summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/SCsub8
-rw-r--r--drivers/convex_decomp/b2Glue.h3
-rw-r--r--drivers/convex_decomp/b2Polygon.cpp4
-rw-r--r--drivers/convex_decomp/b2Polygon.h2
-rw-r--r--drivers/etc1/rg_etc1.cpp4908
-rw-r--r--drivers/etc1/rg_etc1.h152
-rw-r--r--drivers/gles2/rasterizer_gles2.cpp15
-rw-r--r--drivers/gles2/rasterizer_gles2.h2
-rw-r--r--drivers/gles2/shader_compiler_gles2.cpp22
-rw-r--r--drivers/mpc/audio_stream_mpc.cpp128
-rw-r--r--drivers/mpc/audio_stream_mpc.h45
-rw-r--r--drivers/nedmalloc/malloc.c.h11628
-rw-r--r--drivers/nedmalloc/nedmalloc.cpp2934
-rw-r--r--drivers/nedmalloc/nedmalloc.h604
-rw-r--r--drivers/openssl/register_openssl.cpp38
-rw-r--r--drivers/openssl/register_openssl.h22
-rw-r--r--drivers/opus/SCsub200
-rw-r--r--drivers/opus/analysis.c645
-rw-r--r--drivers/opus/analysis.h90
-rw-r--r--drivers/opus/audio_stream_opus.cpp376
-rw-r--r--drivers/opus/audio_stream_opus.h141
-rw-r--r--drivers/opus/celt/_kiss_fft_guts.h183
-rw-r--r--drivers/opus/celt/arch.h214
-rwxr-xr-xdrivers/opus/celt/arm/arm2gnu.pl316
-rw-r--r--drivers/opus/celt/arm/arm_celt_map.c49
-rw-r--r--drivers/opus/celt/arm/armcpu.c174
-rw-r--r--drivers/opus/celt/arm/armcpu.h71
-rw-r--r--drivers/opus/celt/arm/armopts.s37
-rw-r--r--drivers/opus/celt/arm/armopts.s.in37
-rw-r--r--drivers/opus/celt/arm/celt_pitch_xcorr_arm.s545
-rw-r--r--drivers/opus/celt/arm/fixed_armv4.h76
-rw-r--r--drivers/opus/celt/arm/fixed_armv5e.h116
-rw-r--r--drivers/opus/celt/arm/kiss_fft_armv4.h121
-rw-r--r--drivers/opus/celt/arm/kiss_fft_armv5e.h118
-rw-r--r--drivers/opus/celt/arm/pitch_arm.h57
-rw-r--r--drivers/opus/celt/bands.c1518
-rw-r--r--drivers/opus/celt/bands.h114
-rw-r--r--drivers/opus/celt/celt.c223
-rw-r--r--drivers/opus/celt/celt.h218
-rw-r--r--drivers/opus/celt/celt_decoder.c1195
-rw-r--r--drivers/opus/celt/celt_encoder.c2353
-rw-r--r--drivers/opus/celt/celt_lpc.c309
-rw-r--r--drivers/opus/celt/celt_lpc.h54
-rw-r--r--drivers/opus/celt/cpu_support.h54
-rw-r--r--drivers/opus/celt/cwrs.c697
-rw-r--r--drivers/opus/celt/cwrs.h48
-rw-r--r--drivers/opus/celt/ecintrin.h87
-rw-r--r--drivers/opus/celt/entcode.c93
-rw-r--r--drivers/opus/celt/entcode.h117
-rw-r--r--drivers/opus/celt/entdec.c245
-rw-r--r--drivers/opus/celt/entdec.h100
-rw-r--r--drivers/opus/celt/entenc.c294
-rw-r--r--drivers/opus/celt/entenc.h110
-rw-r--r--drivers/opus/celt/fixed_debug.h773
-rw-r--r--drivers/opus/celt/fixed_generic.h134
-rw-r--r--drivers/opus/celt/float_cast.h140
-rw-r--r--drivers/opus/celt/kiss_fft.c719
-rw-r--r--drivers/opus/celt/kiss_fft.h139
-rw-r--r--drivers/opus/celt/laplace.c134
-rw-r--r--drivers/opus/celt/laplace.h48
-rw-r--r--drivers/opus/celt/mathops.c208
-rw-r--r--drivers/opus/celt/mathops.h258
-rw-r--r--drivers/opus/celt/mdct.c311
-rw-r--r--drivers/opus/celt/mdct.h70
-rw-r--r--drivers/opus/celt/mfrngcod.h48
-rw-r--r--drivers/opus/celt/modes.c438
-rw-r--r--drivers/opus/celt/opus_custom_demo.c210
-rw-r--r--drivers/opus/celt/opus_modes.h83
-rw-r--r--drivers/opus/celt/os_support.h92
-rw-r--r--drivers/opus/celt/pitch.c537
-rw-r--r--drivers/opus/celt/pitch.h173
-rw-r--r--drivers/opus/celt/quant_bands.c556
-rw-r--r--drivers/opus/celt/quant_bands.h66
-rw-r--r--drivers/opus/celt/rate.c638
-rw-r--r--drivers/opus/celt/rate.h101
-rw-r--r--drivers/opus/celt/stack_alloc.h182
-rw-r--r--drivers/opus/celt/static_modes_fixed.h595
-rw-r--r--drivers/opus/celt/static_modes_float.h599
-rw-r--r--drivers/opus/celt/tests/test_unit_cwrs32.c161
-rw-r--r--drivers/opus/celt/tests/test_unit_dft.c164
-rw-r--r--drivers/opus/celt/tests/test_unit_entropy.c382
-rw-r--r--drivers/opus/celt/tests/test_unit_laplace.c92
-rw-r--r--drivers/opus/celt/tests/test_unit_mathops.c275
-rw-r--r--drivers/opus/celt/tests/test_unit_mdct.c210
-rw-r--r--drivers/opus/celt/tests/test_unit_rotation.c90
-rw-r--r--drivers/opus/celt/tests/test_unit_types.c50
-rw-r--r--drivers/opus/celt/vq.c415
-rw-r--r--drivers/opus/celt/vq.h70
-rw-r--r--drivers/opus/celt/x86/pitch_sse.h156
-rw-r--r--drivers/opus/http.c3391
-rw-r--r--drivers/opus/info.c687
-rw-r--r--drivers/opus/internal.c42
-rw-r--r--drivers/opus/internal.h249
-rw-r--r--drivers/opus/mlp.c140
-rw-r--r--drivers/opus/mlp.h41
-rw-r--r--drivers/opus/mlp_data.c105
-rw-r--r--drivers/opus/opus.c329
-rw-r--r--drivers/opus/opus.h978
-rw-r--r--drivers/opus/opus_compare.c379
-rw-r--r--drivers/opus/opus_config.h121
-rw-r--r--drivers/opus/opus_custom.h342
-rw-r--r--drivers/opus/opus_decoder.c970
-rw-r--r--drivers/opus/opus_defines.h726
-rw-r--r--drivers/opus/opus_demo.c885
-rw-r--r--drivers/opus/opus_encoder.c2488
-rw-r--r--drivers/opus/opus_multistream.c92
-rw-r--r--drivers/opus/opus_multistream.h660
-rw-r--r--drivers/opus/opus_multistream_decoder.c537
-rw-r--r--drivers/opus/opus_multistream_encoder.c1174
-rw-r--r--drivers/opus/opus_private.h129
-rw-r--r--drivers/opus/opus_types.h159
-rw-r--r--drivers/opus/opusfile.c3158
-rw-r--r--drivers/opus/opusfile.h2102
-rw-r--r--drivers/opus/repacketizer.c345
-rw-r--r--drivers/opus/repacketizer_demo.c217
-rw-r--r--drivers/opus/silk/A2NLSF.c252
-rw-r--r--drivers/opus/silk/API.h133
-rw-r--r--drivers/opus/silk/CNG.c172
-rw-r--r--drivers/opus/silk/HP_variable_cutoff.c77
-rw-r--r--drivers/opus/silk/Inlines.h188
-rw-r--r--drivers/opus/silk/LPC_analysis_filter.c106
-rw-r--r--drivers/opus/silk/LPC_inv_pred_gain.c154
-rw-r--r--drivers/opus/silk/LP_variable_cutoff.c135
-rw-r--r--drivers/opus/silk/MacroCount.h718
-rw-r--r--drivers/opus/silk/MacroDebug.h952
-rw-r--r--drivers/opus/silk/NLSF2A.c178
-rw-r--r--drivers/opus/silk/NLSF_VQ.c68
-rw-r--r--drivers/opus/silk/NLSF_VQ_weights_laroia.c80
-rw-r--r--drivers/opus/silk/NLSF_decode.c101
-rw-r--r--drivers/opus/silk/NLSF_del_dec_quant.c207
-rw-r--r--drivers/opus/silk/NLSF_encode.c136
-rw-r--r--drivers/opus/silk/NLSF_stabilize.c142
-rw-r--r--drivers/opus/silk/NLSF_unpack.c55
-rw-r--r--drivers/opus/silk/NSQ.c446
-rw-r--r--drivers/opus/silk/NSQ_del_dec.c719
-rw-r--r--drivers/opus/silk/PLC.c423
-rw-r--r--drivers/opus/silk/PLC.h61
-rw-r--r--drivers/opus/silk/SigProc_FIX.h594
-rw-r--r--drivers/opus/silk/VAD.c357
-rw-r--r--drivers/opus/silk/VQ_WMat_EC.c120
-rw-r--r--drivers/opus/silk/ana_filt_bank_1.c74
-rw-r--r--drivers/opus/silk/arm/SigProc_FIX_armv4.h47
-rw-r--r--drivers/opus/silk/arm/SigProc_FIX_armv5e.h61
-rw-r--r--drivers/opus/silk/arm/macros_armv4.h103
-rw-r--r--drivers/opus/silk/arm/macros_armv5e.h213
-rw-r--r--drivers/opus/silk/biquad_alt.c78
-rw-r--r--drivers/opus/silk/bwexpander.c51
-rw-r--r--drivers/opus/silk/bwexpander_32.c50
-rw-r--r--drivers/opus/silk/check_control_input.c106
-rw-r--r--drivers/opus/silk/code_signs.c115
-rw-r--r--drivers/opus/silk/control.h142
-rw-r--r--drivers/opus/silk/control_SNR.c81
-rw-r--r--drivers/opus/silk/control_audio_bandwidth.c126
-rw-r--r--drivers/opus/silk/control_codec.c422
-rw-r--r--drivers/opus/silk/debug.c170
-rw-r--r--drivers/opus/silk/debug.h279
-rw-r--r--drivers/opus/silk/dec_API.c397
-rw-r--r--drivers/opus/silk/decode_core.c238
-rw-r--r--drivers/opus/silk/decode_frame.c128
-rw-r--r--drivers/opus/silk/decode_indices.c151
-rw-r--r--drivers/opus/silk/decode_parameters.c115
-rw-r--r--drivers/opus/silk/decode_pitch.c77
-rw-r--r--drivers/opus/silk/decode_pulses.c115
-rw-r--r--drivers/opus/silk/decoder_set_fs.c108
-rw-r--r--drivers/opus/silk/define.h235
-rw-r--r--drivers/opus/silk/enc_API.c556
-rw-r--r--drivers/opus/silk/encode_indices.c181
-rw-r--r--drivers/opus/silk/encode_pulses.c206
-rw-r--r--drivers/opus/silk/errors.h98
-rw-r--r--drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c85
-rw-r--r--drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c53
-rw-r--r--drivers/opus/silk/fixed/apply_sine_window_FIX.c101
-rw-r--r--drivers/opus/silk/fixed/autocorr_FIX.c48
-rw-r--r--drivers/opus/silk/fixed/burg_modified_FIX.c279
-rw-r--r--drivers/opus/silk/fixed/corrMatrix_FIX.c156
-rw-r--r--drivers/opus/silk/fixed/encode_frame_FIX.c385
-rw-r--r--drivers/opus/silk/fixed/find_LPC_FIX.c151
-rw-r--r--drivers/opus/silk/fixed/find_LTP_FIX.c244
-rw-r--r--drivers/opus/silk/fixed/find_pitch_lags_FIX.c145
-rw-r--r--drivers/opus/silk/fixed/find_pred_coefs_FIX.c147
-rw-r--r--drivers/opus/silk/fixed/k2a_FIX.c53
-rw-r--r--drivers/opus/silk/fixed/k2a_Q16_FIX.c53
-rw-r--r--drivers/opus/silk/fixed/main_FIX.h257
-rw-r--r--drivers/opus/silk/fixed/noise_shape_analysis_FIX.c445
-rw-r--r--drivers/opus/silk/fixed/pitch_analysis_core_FIX.c744
-rw-r--r--drivers/opus/silk/fixed/prefilter_FIX.c209
-rw-r--r--drivers/opus/silk/fixed/process_gains_FIX.c117
-rw-r--r--drivers/opus/silk/fixed/regularize_correlations_FIX.c47
-rw-r--r--drivers/opus/silk/fixed/residual_energy16_FIX.c103
-rw-r--r--drivers/opus/silk/fixed/residual_energy_FIX.c97
-rw-r--r--drivers/opus/silk/fixed/schur64_FIX.c92
-rw-r--r--drivers/opus/silk/fixed/schur_FIX.c106
-rw-r--r--drivers/opus/silk/fixed/solve_LS_FIX.c249
-rw-r--r--drivers/opus/silk/fixed/structs_FIX.h133
-rw-r--r--drivers/opus/silk/fixed/vector_ops_FIX.c96
-rw-r--r--drivers/opus/silk/fixed/warped_autocorrelation_FIX.c88
-rw-r--r--drivers/opus/silk/float/LPC_analysis_filter_FLP.c249
-rw-r--r--drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c76
-rw-r--r--drivers/opus/silk/float/LTP_analysis_filter_FLP.c75
-rw-r--r--drivers/opus/silk/float/LTP_scale_ctrl_FLP.c52
-rw-r--r--drivers/opus/silk/float/SigProc_FLP.h204
-rw-r--r--drivers/opus/silk/float/apply_sine_window_FLP.c81
-rw-r--r--drivers/opus/silk/float/autocorrelation_FLP.c52
-rw-r--r--drivers/opus/silk/float/burg_modified_FLP.c186
-rw-r--r--drivers/opus/silk/float/bwexpander_FLP.c49
-rw-r--r--drivers/opus/silk/float/corrMatrix_FLP.c93
-rw-r--r--drivers/opus/silk/float/encode_frame_FLP.c372
-rw-r--r--drivers/opus/silk/float/energy_FLP.c60
-rw-r--r--drivers/opus/silk/float/find_LPC_FLP.c104
-rw-r--r--drivers/opus/silk/float/find_LTP_FLP.c132
-rw-r--r--drivers/opus/silk/float/find_pitch_lags_FLP.c132
-rw-r--r--drivers/opus/silk/float/find_pred_coefs_FLP.c117
-rw-r--r--drivers/opus/silk/float/inner_product_FLP.c60
-rw-r--r--drivers/opus/silk/float/k2a_FLP.c53
-rw-r--r--drivers/opus/silk/float/levinsondurbin_FLP.c81
-rw-r--r--drivers/opus/silk/float/main_FLP.h312
-rw-r--r--drivers/opus/silk/float/noise_shape_analysis_FLP.c365
-rw-r--r--drivers/opus/silk/float/pitch_analysis_core_FLP.c630
-rw-r--r--drivers/opus/silk/float/prefilter_FLP.c206
-rw-r--r--drivers/opus/silk/float/process_gains_FLP.c103
-rw-r--r--drivers/opus/silk/float/regularize_correlations_FLP.c48
-rw-r--r--drivers/opus/silk/float/residual_energy_FLP.c117
-rw-r--r--drivers/opus/silk/float/scale_copy_vector_FLP.c57
-rw-r--r--drivers/opus/silk/float/scale_vector_FLP.c56
-rw-r--r--drivers/opus/silk/float/schur_FLP.c70
-rw-r--r--drivers/opus/silk/float/solve_LS_FLP.c207
-rw-r--r--drivers/opus/silk/float/sort_FLP.c83
-rw-r--r--drivers/opus/silk/float/structs_FLP.h131
-rw-r--r--drivers/opus/silk/float/warped_autocorrelation_FLP.c73
-rw-r--r--drivers/opus/silk/float/wrappers_FLP.c201
-rw-r--r--drivers/opus/silk/gain_quant.c141
-rw-r--r--drivers/opus/silk/init_decoder.c56
-rw-r--r--drivers/opus/silk/init_encoder.c64
-rw-r--r--drivers/opus/silk/inner_prod_aligned.c47
-rw-r--r--drivers/opus/silk/interpolate.c51
-rw-r--r--drivers/opus/silk/lin2log.c46
-rw-r--r--drivers/opus/silk/log2lin.c58
-rw-r--r--drivers/opus/silk/macros.h113
-rw-r--r--drivers/opus/silk/pitch_est_defines.h88
-rw-r--r--drivers/opus/silk/pitch_est_tables.c99
-rw-r--r--drivers/opus/silk/process_NLSFs.c105
-rw-r--r--drivers/opus/silk/quant_LTP_gains.c128
-rw-r--r--drivers/opus/silk/resampler.c215
-rw-r--r--drivers/opus/silk/resampler_down2.c74
-rw-r--r--drivers/opus/silk/resampler_down2_3.c103
-rw-r--r--drivers/opus/silk/resampler_private.h88
-rw-r--r--drivers/opus/silk/resampler_private_AR2.c55
-rw-r--r--drivers/opus/silk/resampler_private_IIR_FIR.c107
-rw-r--r--drivers/opus/silk/resampler_private_down_FIR.c194
-rw-r--r--drivers/opus/silk/resampler_private_up2_HQ.c113
-rw-r--r--drivers/opus/silk/resampler_rom.c96
-rw-r--r--drivers/opus/silk/resampler_rom.h68
-rw-r--r--drivers/opus/silk/resampler_structs.h60
-rw-r--r--drivers/opus/silk/shell_coder.c151
-rw-r--r--drivers/opus/silk/sigm_Q15.c76
-rw-r--r--drivers/opus/silk/silk_main.h438
-rw-r--r--drivers/opus/silk/sort.c154
-rw-r--r--drivers/opus/silk/stereo_LR_to_MS.c229
-rw-r--r--drivers/opus/silk/stereo_MS_to_LR.c85
-rw-r--r--drivers/opus/silk/stereo_decode_pred.c73
-rw-r--r--drivers/opus/silk/stereo_encode_pred.c62
-rw-r--r--drivers/opus/silk/stereo_find_predictor.c79
-rw-r--r--drivers/opus/silk/stereo_quant_pred.c73
-rw-r--r--drivers/opus/silk/structs.h327
-rw-r--r--drivers/opus/silk/sum_sqr_shift.c85
-rw-r--r--drivers/opus/silk/table_LSF_cos.c70
-rw-r--r--drivers/opus/silk/tables.h122
-rw-r--r--drivers/opus/silk/tables_LTP.c296
-rw-r--r--drivers/opus/silk/tables_NLSF_CB_NB_MB.c159
-rw-r--r--drivers/opus/silk/tables_NLSF_CB_WB.c198
-rw-r--r--drivers/opus/silk/tables_gain.c63
-rw-r--r--drivers/opus/silk/tables_other.c138
-rw-r--r--drivers/opus/silk/tables_pitch_lag.c69
-rw-r--r--drivers/opus/silk/tables_pulses_per_block.c264
-rw-r--r--drivers/opus/silk/tuning_parameters.h171
-rw-r--r--drivers/opus/silk/typedef.h78
-rw-r--r--drivers/opus/stream.c366
-rw-r--r--drivers/opus/tansig_table.h45
-rw-r--r--drivers/opus/wincerts.c171
-rw-r--r--drivers/opus/winerrno.h90
-rw-r--r--drivers/pulseaudio/audio_driver_pulseaudio.cpp13
-rw-r--r--drivers/pulseaudio/audio_driver_pulseaudio.h5
-rw-r--r--drivers/register_driver_types.cpp40
-rw-r--r--drivers/rtaudio/RtAudio.cpp20468
-rw-r--r--drivers/speex/audio_stream_speex.cpp178
-rw-r--r--drivers/speex/audio_stream_speex.h56
-rw-r--r--drivers/speex/config.h104
-rw-r--r--drivers/speex/lsp.h128
-rw-r--r--drivers/speex/speex_bind.cpp128
-rw-r--r--drivers/speex/speex_bind.h96
-rw-r--r--drivers/theora/video_stream_theora.cpp449
-rw-r--r--drivers/theora/video_stream_theora.h58
-rw-r--r--drivers/theoraplayer/SCsub106
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraAsync.h51
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h51
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h48
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h89
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraException.h46
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraExport.h38
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h95
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h18
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h17
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraTimer.h69
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraUtil.h32
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h282
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h56
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h110
-rw-r--r--drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h32
-rw-r--r--drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h47
-rw-r--r--drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm457
-rw-r--r--drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp439
-rw-r--r--drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h53
-rw-r--r--drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp703
-rw-r--r--drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h64
-rw-r--r--drivers/theoraplayer/src/TheoraAsync.cpp253
-rw-r--r--drivers/theoraplayer/src/TheoraAudioInterface.cpp21
-rw-r--r--drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp126
-rw-r--r--drivers/theoraplayer/src/TheoraDataSource.cpp128
-rw-r--r--drivers/theoraplayer/src/TheoraException.cpp37
-rw-r--r--drivers/theoraplayer/src/TheoraFrameQueue.cpp174
-rw-r--r--drivers/theoraplayer/src/TheoraTimer.cpp70
-rw-r--r--drivers/theoraplayer/src/TheoraUtil.cpp59
-rw-r--r--drivers/theoraplayer/src/TheoraVideoClip.cpp496
-rw-r--r--drivers/theoraplayer/src/TheoraVideoFrame.cpp159
-rw-r--r--drivers/theoraplayer/src/TheoraVideoManager.cpp485
-rw-r--r--drivers/theoraplayer/src/TheoraWorkerThread.cpp49
-rw-r--r--drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c56
-rw-r--r--drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c358
-rw-r--r--drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c86
-rw-r--r--drivers/theoraplayer/src/YUV/android/cpu-features.c1095
-rw-r--r--drivers/theoraplayer/src/YUV/android/cpu-features.h212
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/LICENSE29
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY8
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv.h33
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h118
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h73
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h254
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h225
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h173
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h168
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h81
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h168
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h201
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h434
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h117
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h33
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h1694
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h85
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h57
-rw-r--r--drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h301
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h16
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h182
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt15
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/compare.cc325
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc42
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc64
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc158
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc232
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/convert.cc1491
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc901
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc1196
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc1096
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc392
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc327
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc383
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc300
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc552
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc558
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc47
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc2238
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/rotate.cc1301
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc209
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc486
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc412
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/row_any.cc542
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/row_common.cc2247
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc991
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc2847
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc6443
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/row_win.cc7284
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm146
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/scale.cc926
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc809
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc145
-rw-r--r--drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc772
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc653
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc699
-rw-r--r--drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc1315
-rw-r--r--drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc1320
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/video_common.cc64
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm1136
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c72
-rwxr-xr-xdrivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h14
-rw-r--r--drivers/theoraplayer/src/YUV/yuv_util.c39
-rw-r--r--drivers/theoraplayer/src/YUV/yuv_util.h17
-rw-r--r--drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj2606
-rw-r--r--drivers/theoraplayer/video_stream_theoraplayer.cpp556
-rw-r--r--drivers/theoraplayer/video_stream_theoraplayer.h66
-rw-r--r--drivers/unix/file_access_unix.cpp6
-rw-r--r--drivers/unix/file_access_unix.h9
-rw-r--r--drivers/unix/os_unix.cpp5
-rw-r--r--drivers/vorbis/audio_stream_ogg_vorbis.cpp206
-rw-r--r--drivers/vorbis/audio_stream_ogg_vorbis.h51
-rw-r--r--drivers/webp/dsp/dsp.h2
404 files changed, 89717 insertions, 76777 deletions
diff --git a/drivers/SCsub b/drivers/SCsub
index 3028139f50..bc46bf2cec 100644
--- a/drivers/SCsub
+++ b/drivers/SCsub
@@ -31,15 +31,17 @@ SConscript("rtaudio/SCsub");
SConscript("nedmalloc/SCsub");
SConscript("nrex/SCsub");
SConscript("chibi/SCsub");
-if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theora"]=="yes"):
+if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theora"]=="yes" or env["opus"]=="yes"):
SConscript("ogg/SCsub");
if (env["vorbis"]=="yes"):
SConscript("vorbis/SCsub");
+if (env["opus"]=="yes"):
+ SConscript('opus/SCsub');
if (env["tools"]=="yes"):
SConscript("convex_decomp/SCsub");
-if env["theora"]=="yes":
- SConscript("theoraplayer/SCsub")
+#if env["theora"]=="yes":
+# SConscript("theoraplayer/SCsub")
if (env["theora"]=="yes"):
SConscript("theora/SCsub");
if (env['speex']=='yes'):
diff --git a/drivers/convex_decomp/b2Glue.h b/drivers/convex_decomp/b2Glue.h
index db765f7eb9..7ec6d7f181 100644
--- a/drivers/convex_decomp/b2Glue.h
+++ b/drivers/convex_decomp/b2Glue.h
@@ -20,7 +20,8 @@
#define B2GLUE_H
#include "math_2d.h"
-#include <limits>
+#include <limits.h>
+
namespace b2ConvexDecomp {
typedef real_t float32;
diff --git a/drivers/convex_decomp/b2Polygon.cpp b/drivers/convex_decomp/b2Polygon.cpp
index 668313967e..775f2adfe2 100644
--- a/drivers/convex_decomp/b2Polygon.cpp
+++ b/drivers/convex_decomp/b2Polygon.cpp
@@ -21,8 +21,8 @@
#include "b2Triangle.h"
#include "b2Polygon.h"
-#include <cmath>
-#include <climits>
+#include <math.h>
+#include <limits.h>
#include <assert.h>
#define b2Assert assert
diff --git a/drivers/convex_decomp/b2Polygon.h b/drivers/convex_decomp/b2Polygon.h
index 82cdc56804..36af2fd9d0 100644
--- a/drivers/convex_decomp/b2Polygon.h
+++ b/drivers/convex_decomp/b2Polygon.h
@@ -22,7 +22,7 @@
#include "b2Triangle.h"
#include "stdio.h"
#include <string.h>
-#include <limits>
+#include <limits.h>
namespace b2ConvexDecomp {
static bool B2_POLYGON_REPORT_ERRORS = false;
diff --git a/drivers/etc1/rg_etc1.cpp b/drivers/etc1/rg_etc1.cpp
index fd109f003c..47dcb57e6b 100644
--- a/drivers/etc1/rg_etc1.cpp
+++ b/drivers/etc1/rg_etc1.cpp
@@ -1,2454 +1,2454 @@
-// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
-// Please see ZLIB license at the end of rg_etc1.h.
-//
-// For more information Ericsson Texture Compression (ETC/ETC1), see:
-// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
-//
-// v1.03 - 5/12/13 - Initial public release
-#include "rg_etc1.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-//#include <stdio.h>
-#include <math.h>
-#include <stdio.h>
-#pragma warning (disable: 4201) // nonstandard extension used : nameless struct/union
-
-#if defined(_DEBUG) || defined(DEBUG)
-#define RG_ETC1_BUILD_DEBUG
-#endif
-
-#define RG_ETC1_ASSERT assert
-
-namespace rg_etc1
-{
-
- inline long labs(long val) {
- return val < 0 ? -val : val;
- }
-
- inline int intabs(int val) {
-
- return val<0?-val:val;
- }
-
- typedef unsigned char uint8;
- typedef unsigned short uint16;
- typedef unsigned int uint;
- typedef unsigned int uint32;
- typedef long long int64;
- typedef unsigned long long uint64;
-
- const uint32 cUINT32_MAX = 0xFFFFFFFFU;
- const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64;
-
- template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; }
- template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); }
- template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; }
- template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); }
- template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); }
- template<typename T> inline T square(T value) { return value * value; }
- template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); }
- template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); }
-
- template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]);
-
-#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X)))
-
- enum eNoClamp { cNoClamp };
-
- struct color_quad_u8
- {
- static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; }
-
- struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; };
-
- public:
- typedef unsigned char component_t;
- typedef int parameter_t;
-
- enum { cNumComps = 4 };
-
- union
- {
- struct
- {
- component_t r;
- component_t g;
- component_t b;
- component_t a;
- };
-
- component_t c[cNumComps];
-
- uint32 m_u32;
- };
-
- inline color_quad_u8()
- {
- }
-
- inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32)
- {
- }
-
- explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- set(y, alpha);
- }
-
- inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
- {
- set(red, green, blue, alpha);
- }
-
- explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- set_noclamp_y_alpha(y, alpha);
- }
-
- inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
- {
- set_noclamp_rgba(red, green, blue, alpha);
- }
-
- inline void clear()
- {
- m_u32 = 0;
- }
-
- inline color_quad_u8& operator= (const color_quad_u8& other)
- {
- m_u32 = other.m_u32;
- return *this;
- }
-
- inline color_quad_u8& set_rgb(const color_quad_u8& other)
- {
- r = other.r;
- g = other.g;
- b = other.b;
- return *this;
- }
-
- inline color_quad_u8& operator= (parameter_t y)
- {
- set(y, component_traits::cMax);
- return *this;
- }
-
- inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- y = clamp(y);
- alpha = clamp(alpha);
- r = static_cast<component_t>(y);
- g = static_cast<component_t>(y);
- b = static_cast<component_t>(y);
- a = static_cast<component_t>(alpha);
- return *this;
- }
-
- inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) );
- RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
-
- r = static_cast<component_t>(y);
- g = static_cast<component_t>(y);
- b = static_cast<component_t>(y);
- a = static_cast<component_t>(alpha);
- return *this;
- }
-
- inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
- {
- r = static_cast<component_t>(clamp(red));
- g = static_cast<component_t>(clamp(green));
- b = static_cast<component_t>(clamp(blue));
- a = static_cast<component_t>(clamp(alpha));
- return *this;
- }
-
- inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha)
- {
- RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
- RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
- RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
- RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
-
- r = static_cast<component_t>(red);
- g = static_cast<component_t>(green);
- b = static_cast<component_t>(blue);
- a = static_cast<component_t>(alpha);
- return *this;
- }
-
- inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue)
- {
- RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
- RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
- RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
-
- r = static_cast<component_t>(red);
- g = static_cast<component_t>(green);
- b = static_cast<component_t>(blue);
- return *this;
- }
-
- static inline parameter_t get_min_comp() { return component_traits::cMin; }
- static inline parameter_t get_max_comp() { return component_traits::cMax; }
- static inline bool get_comps_are_signed() { return component_traits::cSigned; }
-
- inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
- inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
-
- inline color_quad_u8& set_component(uint i, parameter_t f)
- {
- RG_ETC1_ASSERT(i < cNumComps);
-
- c[i] = static_cast<component_t>(clamp(f));
-
- return *this;
- }
-
- inline color_quad_u8& set_grayscale(parameter_t l)
- {
- component_t x = static_cast<component_t>(clamp(l));
- c[0] = x;
- c[1] = x;
- c[2] = x;
- return *this;
- }
-
- inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h)
- {
- for (uint i = 0; i < cNumComps; i++)
- c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i]));
- return *this;
- }
-
- inline color_quad_u8& clamp(parameter_t l, parameter_t h)
- {
- for (uint i = 0; i < cNumComps; i++)
- c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h));
- return *this;
- }
-
- // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y).
- inline parameter_t get_luma() const
- {
- return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U);
- }
-
- // Returns REC 709 luma.
- inline parameter_t get_luma_rec709() const
- {
- return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U);
- }
-
- inline uint squared_distance_rgb(const color_quad_u8& c) const
- {
- return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b);
- }
-
- inline uint squared_distance_rgba(const color_quad_u8& c) const
- {
- return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a);
- }
-
- inline bool rgb_equals(const color_quad_u8& rhs) const
- {
- return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
- }
-
- inline bool operator== (const color_quad_u8& rhs) const
- {
- return m_u32 == rhs.m_u32;
- }
-
- color_quad_u8& operator+= (const color_quad_u8& other)
- {
- for (uint i = 0; i < 4; i++)
- c[i] = static_cast<component_t>(clamp(c[i] + other.c[i]));
- return *this;
- }
-
- color_quad_u8& operator-= (const color_quad_u8& other)
- {
- for (uint i = 0; i < 4; i++)
- c[i] = static_cast<component_t>(clamp(c[i] - other.c[i]));
- return *this;
- }
-
- friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs)
- {
- color_quad_u8 result(lhs);
- result += rhs;
- return result;
- }
-
- friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs)
- {
- color_quad_u8 result(lhs);
- result -= rhs;
- return result;
- }
- }; // class color_quad_u8
-
- struct vec3F
- {
- float m_s[3];
-
- inline vec3F() { }
- inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; }
- inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; }
-
- inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; }
-
- inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; }
-
- inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; }
- };
-
- enum etc_constants
- {
- cETC1BytesPerBlock = 8U,
-
- cETC1SelectorBits = 2U,
- cETC1SelectorValues = 1U << cETC1SelectorBits,
- cETC1SelectorMask = cETC1SelectorValues - 1U,
-
- cETC1BlockShift = 2U,
- cETC1BlockSize = 1U << cETC1BlockShift,
-
- cETC1LSBSelectorIndicesBitOffset = 0,
- cETC1MSBSelectorIndicesBitOffset = 16,
-
- cETC1FlipBitOffset = 32,
- cETC1DiffBitOffset = 33,
-
- cETC1IntenModifierNumBits = 3,
- cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
- cETC1RightIntenModifierTableBitOffset = 34,
- cETC1LeftIntenModifierTableBitOffset = 37,
-
- // Base+Delta encoding (5 bit bases, 3 bit delta)
- cETC1BaseColorCompNumBits = 5,
- cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
-
- cETC1DeltaColorCompNumBits = 3,
- cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
- cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
-
- cETC1BaseColor5RBitOffset = 59,
- cETC1BaseColor5GBitOffset = 51,
- cETC1BaseColor5BBitOffset = 43,
-
- cETC1DeltaColor3RBitOffset = 56,
- cETC1DeltaColor3GBitOffset = 48,
- cETC1DeltaColor3BBitOffset = 40,
-
- // Absolute (non-delta) encoding (two 4-bit per component bases)
- cETC1AbsColorCompNumBits = 4,
- cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
-
- cETC1AbsColor4R1BitOffset = 60,
- cETC1AbsColor4G1BitOffset = 52,
- cETC1AbsColor4B1BitOffset = 44,
-
- cETC1AbsColor4R2BitOffset = 56,
- cETC1AbsColor4G2BitOffset = 48,
- cETC1AbsColor4B2BitOffset = 40,
-
- cETC1ColorDeltaMin = -4,
- cETC1ColorDeltaMax = 3,
-
- // Delta3:
- // 0 1 2 3 4 5 6 7
- // 000 001 010 011 100 101 110 111
- // 0 1 2 3 -4 -3 -2 -1
- };
-
- static uint8 g_quant5_tab[256+16];
-
-
- static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] =
- {
- { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 },
- { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 }
- };
-
- static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
- static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
-
- // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte.
- static uint16 g_etc1_inverse_lookup[2*8*4][256]; // [diff/inten_table/selector][desired_color]
-
- // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color.
- // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8)
- static const uint16 g_color8_to_etc_block_config_0_255[2][33] =
- {
- { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E,
- 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF },
- { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E,
- 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF },
- };
-
- // Really only [254][11].
- static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] =
- {
- { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E,
- 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, {
- 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306,
- 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112,
- 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707,
- 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B,
- 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605,
- 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF
- }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214,
- 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A,
- 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, {
- 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B,
- 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D,
- 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805,
- 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F,
- 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, {
- 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521,
- 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523,
- 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F,
- 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B,
- 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, {
- 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F,
- 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D,
- 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529,
- 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917,
- 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E,
- 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725,
- 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139,
- 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, {
- 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A,
- 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437,
- 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500,
- 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, {
- 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19,
- 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D,
- 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, {
- 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F,
- 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D,
- 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, {
- 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05,
- 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434,
- 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01,
- 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21,
- 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27,
- 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E,
- 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D,
- 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, {
- 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, {
- 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307,
- 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33,
- 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B,
- 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, {
- 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103,
- 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B,
- 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536,
- 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A,
- 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115,
- 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, {
- 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF
- }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820,
- 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031,
- 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, {
- 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35,
- 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F,
- 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D,
- 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029,
- 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832,
- 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D,
- 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133,
- 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF
- }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, {
- 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331,
- 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D,
- 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513,
- 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF
- }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, {
- 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, {
- 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905,
- 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09,
- 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D,
- 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621,
- 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18,
- 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919,
- 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625,
- 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F,
- 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936,
- 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A,
- 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, {
- 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913,
- 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, {
- 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20,
- 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C,
- 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, {
- 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06,
- 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, {
- 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26,
- 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18,
- 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03,
- 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929,
- 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23,
- 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF
- }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B,
- 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E,
- 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18,
- 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01,
- 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16,
- 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B,
- 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01,
- 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34,
- 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11,
- 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF },
- };
-
- struct etc1_block
- {
- // big endian uint64:
- // bit ofs: 56 48 40 32 24 16 8 0
- // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
- union
- {
- uint64 m_uint64;
- uint8 m_bytes[8];
- };
-
- uint8 m_low_color[2];
- uint8 m_high_color[2];
-
- enum { cNumSelectorBytes = 4 };
- uint8 m_selectors[cNumSelectorBytes];
-
- inline void clear()
- {
- zero_this(this);
- }
-
- inline uint get_byte_bits(uint ofs, uint num) const
- {
- RG_ETC1_ASSERT((ofs + num) <= 64U);
- RG_ETC1_ASSERT(num && (num <= 8U));
- RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
- const uint byte_ofs = 7 - (ofs >> 3);
- const uint byte_bit_ofs = ofs & 7;
- return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
- }
-
- inline void set_byte_bits(uint ofs, uint num, uint bits)
- {
- RG_ETC1_ASSERT((ofs + num) <= 64U);
- RG_ETC1_ASSERT(num && (num < 32U));
- RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
- RG_ETC1_ASSERT(bits < (1U << num));
- const uint byte_ofs = 7 - (ofs >> 3);
- const uint byte_bit_ofs = ofs & 7;
- const uint mask = (1 << num) - 1;
- m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
- m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
- }
-
- // false = left/right subblocks
- // true = upper/lower subblocks
- inline bool get_flip_bit() const
- {
- return (m_bytes[3] & 1) != 0;
- }
-
- inline void set_flip_bit(bool flip)
- {
- m_bytes[3] &= ~1;
- m_bytes[3] |= static_cast<uint8>(flip);
- }
-
- inline bool get_diff_bit() const
- {
- return (m_bytes[3] & 2) != 0;
- }
-
- inline void set_diff_bit(bool diff)
- {
- m_bytes[3] &= ~2;
- m_bytes[3] |= (static_cast<uint>(diff) << 1);
- }
-
- // Returns intensity modifier table (0-7) used by subblock subblock_id.
- // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2)
- inline uint get_inten_table(uint subblock_id) const
- {
- RG_ETC1_ASSERT(subblock_id < 2);
- const uint ofs = subblock_id ? 2 : 5;
- return (m_bytes[3] >> ofs) & 7;
- }
-
- // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
- inline void set_inten_table(uint subblock_id, uint t)
- {
- RG_ETC1_ASSERT(subblock_id < 2);
- RG_ETC1_ASSERT(t < 8);
- const uint ofs = subblock_id ? 2 : 5;
- m_bytes[3] &= ~(7 << ofs);
- m_bytes[3] |= (t << ofs);
- }
-
- // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
- inline uint get_selector(uint x, uint y) const
- {
- RG_ETC1_ASSERT((x | y) < 4);
-
- const uint bit_index = x * 4 + y;
- const uint byte_bit_ofs = bit_index & 7;
- const uint8 *p = &m_bytes[7 - (bit_index >> 3)];
- const uint lsb = (p[0] >> byte_bit_ofs) & 1;
- const uint msb = (p[-2] >> byte_bit_ofs) & 1;
- const uint val = lsb | (msb << 1);
-
- return g_etc1_to_selector_index[val];
- }
-
- // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
- inline void set_selector(uint x, uint y, uint val)
- {
- RG_ETC1_ASSERT((x | y | val) < 4);
- const uint bit_index = x * 4 + y;
-
- uint8 *p = &m_bytes[7 - (bit_index >> 3)];
-
- const uint byte_bit_ofs = bit_index & 7;
- const uint mask = 1 << byte_bit_ofs;
-
- const uint etc1_val = g_selector_index_to_etc1[val];
-
- const uint lsb = etc1_val & 1;
- const uint msb = etc1_val >> 1;
-
- p[0] &= ~mask;
- p[0] |= (lsb << byte_bit_ofs);
-
- p[-2] &= ~mask;
- p[-2] |= (msb << byte_bit_ofs);
- }
-
- inline void set_base4_color(uint idx, uint16 c)
- {
- if (idx)
- {
- set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
- set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
- set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
- }
- else
- {
- set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
- set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
- set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
- }
- }
-
- inline uint16 get_base4_color(uint idx) const
- {
- uint r, g, b;
- if (idx)
- {
- r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
- g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
- b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
- }
- else
- {
- r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
- g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
- b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
- }
- return static_cast<uint16>(b | (g << 4U) | (r << 8U));
- }
-
- inline void set_base5_color(uint16 c)
- {
- set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
- set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
- set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
- }
-
- inline uint16 get_base5_color() const
- {
- const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
- const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
- const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
- return static_cast<uint16>(b | (g << 5U) | (r << 10U));
- }
-
- void set_delta3_color(uint16 c)
- {
- set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
- set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
- set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
- }
-
- inline uint16 get_delta3_color() const
- {
- const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
- const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
- const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
- return static_cast<uint16>(b | (g << 3U) | (r << 6U));
- }
-
- // Base color 5
- static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U);
- static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U);
-
- static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U);
- static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled);
-
- static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
- static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
-
- // Delta color 3
- // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
- static uint16 pack_delta3(int r, int g, int b);
-
- // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
- static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3);
-
- // Abs color 4
- static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U);
- static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U);
-
- static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U);
- static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled);
-
- // subblock colors
- static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx);
- static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx);
- static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx);
-
- static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4)
- {
- if (color4)
- {
- dst.r = src.r | (src.r << 4);
- dst.g = src.g | (src.g << 4);
- dst.b = src.b | (src.b << 4);
- }
- else
- {
- dst.r = (src.r >> 2) | (src.r << 3);
- dst.g = (src.g >> 2) | (src.g << 3);
- dst.b = (src.b >> 2) | (src.b << 3);
- }
- dst.a = src.a;
- }
- };
-
- // Returns pointer to sorted array.
- template<typename T, typename Q>
- T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices)
- {
- RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T)));
- RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4));
-
- if (init_indices)
- {
- T* p = pIndices0;
- T* q = pIndices0 + (num_indices >> 1) * 2;
- uint i;
- for (i = 0; p != q; p += 2, i += 2)
- {
- p[0] = static_cast<T>(i);
- p[1] = static_cast<T>(i + 1);
- }
-
- if (num_indices & 1)
- *p = static_cast<T>(i);
- }
-
- uint hist[256 * 4];
-
- memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
-
-#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs))
-#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs))
-
- if (key_size == 4)
- {
- T* p = pIndices0;
- T* q = pIndices0 + num_indices;
- for ( ; p != q; p++)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[ key & 0xFF]++;
- hist[256 + ((key >> 8) & 0xFF)]++;
- hist[512 + ((key >> 16) & 0xFF)]++;
- hist[768 + ((key >> 24) & 0xFF)]++;
- }
- }
- else if (key_size == 3)
- {
- T* p = pIndices0;
- T* q = pIndices0 + num_indices;
- for ( ; p != q; p++)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[ key & 0xFF]++;
- hist[256 + ((key >> 8) & 0xFF)]++;
- hist[512 + ((key >> 16) & 0xFF)]++;
- }
- }
- else if (key_size == 2)
- {
- T* p = pIndices0;
- T* q = pIndices0 + (num_indices >> 1) * 2;
-
- for ( ; p != q; p += 2)
- {
- const uint key0 = RG_ETC1_GET_KEY(p);
- const uint key1 = RG_ETC1_GET_KEY(p+1);
-
- hist[ key0 & 0xFF]++;
- hist[256 + ((key0 >> 8) & 0xFF)]++;
-
- hist[ key1 & 0xFF]++;
- hist[256 + ((key1 >> 8) & 0xFF)]++;
- }
-
- if (num_indices & 1)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[ key & 0xFF]++;
- hist[256 + ((key >> 8) & 0xFF)]++;
- }
- }
- else
- {
- RG_ETC1_ASSERT(key_size == 1);
- if (key_size != 1)
- return NULL;
-
- T* p = pIndices0;
- T* q = pIndices0 + (num_indices >> 1) * 2;
-
- for ( ; p != q; p += 2)
- {
- const uint key0 = RG_ETC1_GET_KEY(p);
- const uint key1 = RG_ETC1_GET_KEY(p+1);
-
- hist[key0 & 0xFF]++;
- hist[key1 & 0xFF]++;
- }
-
- if (num_indices & 1)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[key & 0xFF]++;
- }
- }
-
- T* pCur = pIndices0;
- T* pNew = pIndices1;
-
- for (uint pass = 0; pass < key_size; pass++)
- {
- const uint* pHist = &hist[pass << 8];
-
- uint offsets[256];
-
- uint cur_ofs = 0;
- for (uint i = 0; i < 256; i += 2)
- {
- offsets[i] = cur_ofs;
- cur_ofs += pHist[i];
-
- offsets[i+1] = cur_ofs;
- cur_ofs += pHist[i+1];
- }
-
- const uint pass_shift = pass << 3;
-
- T* p = pCur;
- T* q = pCur + (num_indices >> 1) * 2;
-
- for ( ; p != q; p += 2)
- {
- uint index0 = p[0];
- uint index1 = p[1];
-
- uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF;
- uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF;
-
- if (c0 == c1)
- {
- uint dst_offset0 = offsets[c0];
-
- offsets[c0] = dst_offset0 + 2;
-
- pNew[dst_offset0] = static_cast<T>(index0);
- pNew[dst_offset0 + 1] = static_cast<T>(index1);
- }
- else
- {
- uint dst_offset0 = offsets[c0]++;
- uint dst_offset1 = offsets[c1]++;
-
- pNew[dst_offset0] = static_cast<T>(index0);
- pNew[dst_offset1] = static_cast<T>(index1);
- }
- }
-
- if (num_indices & 1)
- {
- uint index = *p;
- uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF;
-
- uint dst_offset = offsets[c];
- offsets[c] = dst_offset + 1;
-
- pNew[dst_offset] = static_cast<T>(index);
- }
-
- T* t = pCur;
- pCur = pNew;
- pNew = t;
- }
-
- return pCur;
- }
-
-#undef RG_ETC1_GET_KEY
-#undef RG_ETC1_GET_KEY_FROM_INDEX
-
- uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias)
- {
- return pack_color5(color.r, color.g, color.b, scaled, bias);
- }
-
- uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias)
- {
- if (scaled)
- {
- r = (r * 31U + bias) / 255U;
- g = (g * 31U + bias) / 255U;
- b = (b * 31U + bias) / 255U;
- }
-
- r = rg_etc1::minimum(r, 31U);
- g = rg_etc1::minimum(g, 31U);
- b = rg_etc1::minimum(b, 31U);
-
- return static_cast<uint16>(b | (g << 5U) | (r << 10U));
- }
-
- color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha)
- {
- uint b = packed_color5 & 31U;
- uint g = (packed_color5 >> 5U) & 31U;
- uint r = (packed_color5 >> 10U) & 31U;
-
- if (scaled)
- {
- b = (b << 3U) | (b >> 2U);
- g = (g << 3U) | (g >> 2U);
- r = (r << 3U) | (r >> 2U);
- }
-
- return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
- }
-
- void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled)
- {
- color_quad_u8 c(unpack_color5(packed_color5, scaled, 0));
- r = c.r;
- g = c.g;
- b = c.b;
- }
-
- bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
- {
- int dc_r, dc_g, dc_b;
- unpack_delta3(dc_r, dc_g, dc_b, packed_delta3);
-
- int b = (packed_color5 & 31U) + dc_b;
- int g = ((packed_color5 >> 5U) & 31U) + dc_g;
- int r = ((packed_color5 >> 10U) & 31U) + dc_r;
-
- bool success = true;
- if (static_cast<uint>(r | g | b) > 31U)
- {
- success = false;
- r = rg_etc1::clamp<int>(r, 0, 31);
- g = rg_etc1::clamp<int>(g, 0, 31);
- b = rg_etc1::clamp<int>(b, 0, 31);
- }
-
- if (scaled)
- {
- b = (b << 3U) | (b >> 2U);
- g = (g << 3U) | (g >> 2U);
- r = (r << 3U) | (r >> 2U);
- }
-
- result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U));
- return success;
- }
-
- bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
- {
- color_quad_u8 result;
- const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha);
- r = result.r;
- g = result.g;
- b = result.b;
- return success;
- }
-
- uint16 etc1_block::pack_delta3(int r, int g, int b)
- {
- RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
- RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
- RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
- if (r < 0) r += 8;
- if (g < 0) g += 8;
- if (b < 0) b += 8;
- return static_cast<uint16>(b | (g << 3) | (r << 6));
- }
-
- void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3)
- {
- r = (packed_delta3 >> 6) & 7;
- g = (packed_delta3 >> 3) & 7;
- b = packed_delta3 & 7;
- if (r >= 4) r -= 8;
- if (g >= 4) g -= 8;
- if (b >= 4) b -= 8;
- }
-
- uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias)
- {
- return pack_color4(color.r, color.g, color.b, scaled, bias);
- }
-
- uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias)
- {
- if (scaled)
- {
- r = (r * 15U + bias) / 255U;
- g = (g * 15U + bias) / 255U;
- b = (b * 15U + bias) / 255U;
- }
-
- r = rg_etc1::minimum(r, 15U);
- g = rg_etc1::minimum(g, 15U);
- b = rg_etc1::minimum(b, 15U);
-
- return static_cast<uint16>(b | (g << 4U) | (r << 8U));
- }
-
- color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha)
- {
- uint b = packed_color4 & 15U;
- uint g = (packed_color4 >> 4U) & 15U;
- uint r = (packed_color4 >> 8U) & 15U;
-
- if (scaled)
- {
- b = (b << 4U) | b;
- g = (g << 4U) | g;
- r = (r << 4U) | r;
- }
-
- return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
- }
-
- void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled)
- {
- color_quad_u8 c(unpack_color4(packed_color4, scaled, 0));
- r = c.r;
- g = c.g;
- b = c.b;
- }
-
- void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx)
- {
- RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
- const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
- uint r, g, b;
- unpack_color5(r, g, b, packed_color5, true);
-
- const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
- const int y0 = pInten_modifer_table[0];
- pDst[0].set(ir + y0, ig + y0, ib + y0);
-
- const int y1 = pInten_modifer_table[1];
- pDst[1].set(ir + y1, ig + y1, ib + y1);
-
- const int y2 = pInten_modifer_table[2];
- pDst[2].set(ir + y2, ig + y2, ib + y2);
-
- const int y3 = pInten_modifer_table[3];
- pDst[3].set(ir + y3, ig + y3, ib + y3);
- }
-
- bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx)
- {
- RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
- const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
- uint r, g, b;
- bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true);
-
- const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
- const int y0 = pInten_modifer_table[0];
- pDst[0].set(ir + y0, ig + y0, ib + y0);
-
- const int y1 = pInten_modifer_table[1];
- pDst[1].set(ir + y1, ig + y1, ib + y1);
-
- const int y2 = pInten_modifer_table[2];
- pDst[2].set(ir + y2, ig + y2, ib + y2);
-
- const int y3 = pInten_modifer_table[3];
- pDst[3].set(ir + y3, ig + y3, ib + y3);
-
- return success;
- }
-
- void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx)
- {
- RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
- const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
- uint r, g, b;
- unpack_color4(r, g, b, packed_color4, true);
-
- const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
- const int y0 = pInten_modifer_table[0];
- pDst[0].set(ir + y0, ig + y0, ib + y0);
-
- const int y1 = pInten_modifer_table[1];
- pDst[1].set(ir + y1, ig + y1, ib + y1);
-
- const int y2 = pInten_modifer_table[2];
- pDst[2].set(ir + y2, ig + y2, ib + y2);
-
- const int y3 = pInten_modifer_table[3];
- pDst[3].set(ir + y3, ig + y3, ib + y3);
- }
-
- bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha)
- {
- color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba);
- const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block);
-
- const bool diff_flag = block.get_diff_bit();
- const bool flip_flag = block.get_flip_bit();
- const uint table_index0 = block.get_inten_table(0);
- const uint table_index1 = block.get_inten_table(1);
-
- color_quad_u8 subblock_colors0[4];
- color_quad_u8 subblock_colors1[4];
- bool success = true;
-
- if (diff_flag)
- {
- const uint16 base_color5 = block.get_base5_color();
- const uint16 delta_color3 = block.get_delta3_color();
- etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0);
-
- if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1))
- success = false;
- }
- else
- {
- const uint16 base_color4_0 = block.get_base4_color(0);
- etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0);
-
- const uint16 base_color4_1 = block.get_base4_color(1);
- etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1);
- }
-
- if (preserve_alpha)
- {
- if (flip_flag)
- {
- for (uint y = 0; y < 2; y++)
- {
- pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
- pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
- pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]);
- pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]);
- pDst += 4;
- }
-
- for (uint y = 2; y < 4; y++)
- {
- pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]);
- pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]);
- pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
- pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
- pDst += 4;
- }
- }
- else
- {
- for (uint y = 0; y < 4; y++)
- {
- pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
- pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
- pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
- pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
- pDst += 4;
- }
- }
- }
- else
- {
- if (flip_flag)
- {
- // 0000
- // 0000
- // 1111
- // 1111
- for (uint y = 0; y < 2; y++)
- {
- pDst[0] = subblock_colors0[block.get_selector(0, y)];
- pDst[1] = subblock_colors0[block.get_selector(1, y)];
- pDst[2] = subblock_colors0[block.get_selector(2, y)];
- pDst[3] = subblock_colors0[block.get_selector(3, y)];
- pDst += 4;
- }
-
- for (uint y = 2; y < 4; y++)
- {
- pDst[0] = subblock_colors1[block.get_selector(0, y)];
- pDst[1] = subblock_colors1[block.get_selector(1, y)];
- pDst[2] = subblock_colors1[block.get_selector(2, y)];
- pDst[3] = subblock_colors1[block.get_selector(3, y)];
- pDst += 4;
- }
- }
- else
- {
- // 0011
- // 0011
- // 0011
- // 0011
- for (uint y = 0; y < 4; y++)
- {
- pDst[0] = subblock_colors0[block.get_selector(0, y)];
- pDst[1] = subblock_colors0[block.get_selector(1, y)];
- pDst[2] = subblock_colors1[block.get_selector(2, y)];
- pDst[3] = subblock_colors1[block.get_selector(3, y)];
- pDst += 4;
- }
- }
- }
-
- return success;
- }
-
- struct etc1_solution_coordinates
- {
- inline etc1_solution_coordinates() :
- m_unscaled_color(0, 0, 0, 0),
- m_inten_table(0),
- m_color4(false)
- {
- }
-
- inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) :
- m_unscaled_color(r, g, b, 255),
- m_inten_table(inten_table),
- m_color4(color4)
- {
- }
-
- inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) :
- m_unscaled_color(c),
- m_inten_table(inten_table),
- m_color4(color4)
- {
- }
-
- inline etc1_solution_coordinates(const etc1_solution_coordinates& other)
- {
- *this = other;
- }
-
- inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs)
- {
- m_unscaled_color = rhs.m_unscaled_color;
- m_inten_table = rhs.m_inten_table;
- m_color4 = rhs.m_color4;
- return *this;
- }
-
- inline void clear()
- {
- m_unscaled_color.clear();
- m_inten_table = 0;
- m_color4 = false;
- }
-
- inline color_quad_u8 get_scaled_color() const
- {
- int br, bg, bb;
- if (m_color4)
- {
- br = m_unscaled_color.r | (m_unscaled_color.r << 4);
- bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
- bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
- }
- else
- {
- br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
- bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
- bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
- }
- return color_quad_u8(br, bg, bb);
- }
-
- inline void get_block_colors(color_quad_u8* pBlock_colors)
- {
- int br, bg, bb;
- if (m_color4)
- {
- br = m_unscaled_color.r | (m_unscaled_color.r << 4);
- bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
- bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
- }
- else
- {
- br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
- bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
- bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
- }
- const int* pInten_table = g_etc1_inten_tables[m_inten_table];
- pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]);
- pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]);
- pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]);
- pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]);
- }
-
- color_quad_u8 m_unscaled_color;
- uint m_inten_table;
- bool m_color4;
- };
-
- class etc1_optimizer
- {
- etc1_optimizer(const etc1_optimizer&);
- etc1_optimizer& operator= (const etc1_optimizer&);
-
- public:
- etc1_optimizer()
- {
- clear();
- }
-
- void clear()
- {
- m_pParams = NULL;
- m_pResult = NULL;
- m_pSorted_luma = NULL;
- m_pSorted_luma_indices = NULL;
- }
-
- struct params : etc1_pack_params
- {
- params()
- {
- clear();
- }
-
- params(const etc1_pack_params& base_params) :
- etc1_pack_params(base_params)
- {
- clear_optimizer_params();
- }
-
- void clear()
- {
- etc1_pack_params::clear();
- clear_optimizer_params();
- }
-
- void clear_optimizer_params()
- {
- m_num_src_pixels = 0;
- m_pSrc_pixels = 0;
-
- m_use_color4 = false;
- static const int s_default_scan_delta[] = { 0 };
- m_pScan_deltas = s_default_scan_delta;
- m_scan_delta_size = 1;
-
- m_base_color5.clear();
- m_constrain_against_base_color5 = false;
- }
-
- uint m_num_src_pixels;
- const color_quad_u8* m_pSrc_pixels;
-
- bool m_use_color4;
- const int* m_pScan_deltas;
- uint m_scan_delta_size;
-
- color_quad_u8 m_base_color5;
- bool m_constrain_against_base_color5;
- };
-
- struct results
- {
- uint64 m_error;
- color_quad_u8 m_block_color_unscaled;
- uint m_block_inten_table;
- uint m_n;
- uint8* m_pSelectors;
- bool m_block_color4;
-
- inline results& operator= (const results& rhs)
- {
- m_block_color_unscaled = rhs.m_block_color_unscaled;
- m_block_color4 = rhs.m_block_color4;
- m_block_inten_table = rhs.m_block_inten_table;
- m_error = rhs.m_error;
- RG_ETC1_ASSERT(m_n == rhs.m_n);
- memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n);
- return *this;
- }
- };
-
- void init(const params& params, results& result);
- bool compute();
-
- private:
- struct potential_solution
- {
- potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false)
- {
- }
-
- etc1_solution_coordinates m_coords;
- uint8 m_selectors[8];
- uint64 m_error;
- bool m_valid;
-
- void clear()
- {
- m_coords.clear();
- m_error = cUINT64_MAX;
- m_valid = false;
- }
- };
-
- const params* m_pParams;
- results* m_pResult;
-
- int m_limit;
-
- vec3F m_avg_color;
- int m_br, m_bg, m_bb;
- uint16 m_luma[8];
- uint32 m_sorted_luma[2][8];
- const uint32* m_pSorted_luma_indices;
- uint32* m_pSorted_luma;
-
- uint8 m_selectors[8];
- uint8 m_best_selectors[8];
-
- potential_solution m_best_solution;
- potential_solution m_trial_solution;
- uint8 m_temp_selectors[8];
-
- bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
- bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
- };
-
- bool etc1_optimizer::compute()
- {
- const uint n = m_pParams->m_num_src_pixels;
- const int scan_delta_size = m_pParams->m_scan_delta_size;
-
- // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color.
- // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index.
- for (int zdi = 0; zdi < scan_delta_size; zdi++)
- {
- const int zd = m_pParams->m_pScan_deltas[zdi];
- const int mbb = m_bb + zd;
- if (mbb < 0) continue; else if (mbb > m_limit) break;
-
- for (int ydi = 0; ydi < scan_delta_size; ydi++)
- {
- const int yd = m_pParams->m_pScan_deltas[ydi];
- const int mbg = m_bg + yd;
- if (mbg < 0) continue; else if (mbg > m_limit) break;
-
- for (int xdi = 0; xdi < scan_delta_size; xdi++)
- {
- const int xd = m_pParams->m_pScan_deltas[xdi];
- const int mbr = m_br + xd;
- if (mbr < 0) continue; else if (mbr > m_limit) break;
-
- etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4);
- if (m_pParams->m_quality == cHighQuality)
- {
- if (!evaluate_solution(coords, m_trial_solution, &m_best_solution))
- continue;
- }
- else
- {
- if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution))
- continue;
- }
-
- // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index.
- // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors:
- // The goal is:
- // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0
- // Rearranging this:
- // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0
- // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0
- // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0
- // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0
- // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0
- // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4
- // So what this means:
- // optimal_block_color = avg_input - avg_inten_delta
- // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta.
- // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula.
- // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping.
-
- const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2);
- for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++)
- {
- const uint8* pSelectors = m_best_solution.m_selectors;
- const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table];
-
- int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0;
- const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color());
- for (uint r = 0; r < n; r++)
- {
- const uint s = *pSelectors++;
- const int yd = pInten_table[s];
- // Compute actual delta being applied to each pixel, taking into account clamping.
- delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r;
- delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g;
- delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b;
- }
- if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b))
- break;
- const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n;
- const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n;
- const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n;
- const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit);
- const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit);
- const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit);
-
- bool skip = false;
-
- if ((mbr == br1) && (mbg == bg1) && (mbb == bb1))
- skip = true;
- else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b))
- skip = true;
- else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1))
- skip = true;
-
- if (skip)
- break;
-
- etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4);
- if (m_pParams->m_quality == cHighQuality)
- {
- if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution))
- break;
- }
- else
- {
- if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution))
- break;
- }
-
- } // refinement_trial
-
- } // xdi
- } // ydi
- } // zdi
-
- if (!m_best_solution.m_valid)
- {
- m_pResult->m_error = cUINT32_MAX;
- return false;
- }
-
- const uint8* pSelectors = m_best_solution.m_selectors;
-
-#ifdef RG_ETC1_BUILD_DEBUG
- {
- color_quad_u8 block_colors[4];
- m_best_solution.m_coords.get_block_colors(block_colors);
-
- const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
- uint64 actual_error = 0;
- for (uint i = 0; i < n; i++)
- actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]);
-
- RG_ETC1_ASSERT(actual_error == m_best_solution.m_error);
- }
-#endif
-
- m_pResult->m_error = m_best_solution.m_error;
-
- m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color;
- m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4;
-
- m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table;
- memcpy(m_pResult->m_pSelectors, pSelectors, n);
- m_pResult->m_n = n;
-
- return true;
- }
-
- void etc1_optimizer::init(const params& p, results& r)
- {
- // This version is hardcoded for 8 pixel subblocks.
- RG_ETC1_ASSERT(p.m_num_src_pixels == 8);
-
- m_pParams = &p;
- m_pResult = &r;
-
- const uint n = 8;
-
- m_limit = m_pParams->m_use_color4 ? 15 : 31;
-
- vec3F avg_color(0.0f);
-
- for (uint i = 0; i < n; i++)
- {
- const color_quad_u8& c = m_pParams->m_pSrc_pixels[i];
- const vec3F fc(c.r, c.g, c.b);
-
- avg_color += fc;
-
- m_luma[i] = static_cast<uint16>(c.r + c.g + c.b);
- m_sorted_luma[0][i] = i;
- }
- avg_color *= (1.0f / static_cast<float>(n));
- m_avg_color = avg_color;
-
- m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit);
- m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit);
- m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit);
-
- if (m_pParams->m_quality <= cMediumQuality)
- {
- m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false);
- m_pSorted_luma = m_sorted_luma[0];
- if (m_pSorted_luma_indices == m_sorted_luma[0])
- m_pSorted_luma = m_sorted_luma[1];
-
- for (uint i = 0; i < n; i++)
- m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]];
- }
-
- m_best_solution.m_coords.clear();
- m_best_solution.m_valid = false;
- m_best_solution.m_error = cUINT64_MAX;
- }
-
- bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
- {
- trial_solution.m_valid = false;
-
- if (m_pParams->m_constrain_against_base_color5)
- {
- const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
- const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
- const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
-
- if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
- return false;
- }
-
- const color_quad_u8 base_color(coords.get_scaled_color());
-
- const uint n = 8;
-
- trial_solution.m_error = cUINT64_MAX;
-
- for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++)
- {
- const int* pInten_table = g_etc1_inten_tables[inten_table];
-
- color_quad_u8 block_colors[4];
- for (uint s = 0; s < 4; s++)
- {
- const int yd = pInten_table[s];
- block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
- }
-
- uint64 total_error = 0;
-
- const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
- for (uint c = 0; c < n; c++)
- {
- const color_quad_u8& src_pixel = *pSrc_pixels++;
-
- uint best_selector_index = 0;
- uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b);
-
- uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_selector_index = 1;
- }
-
- trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_selector_index = 2;
- }
-
- trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_selector_index = 3;
- }
-
- m_temp_selectors[c] = static_cast<uint8>(best_selector_index);
-
- total_error += best_error;
- if (total_error >= trial_solution.m_error)
- break;
- }
-
- if (total_error < trial_solution.m_error)
- {
- trial_solution.m_error = total_error;
- trial_solution.m_coords.m_inten_table = inten_table;
- memcpy(trial_solution.m_selectors, m_temp_selectors, 8);
- trial_solution.m_valid = true;
- }
- }
- trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
- trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
-
- bool success = false;
- if (pBest_solution)
- {
- if (trial_solution.m_error < pBest_solution->m_error)
- {
- *pBest_solution = trial_solution;
- success = true;
- }
- }
-
- return success;
- }
-
- bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
- {
- if (m_pParams->m_constrain_against_base_color5)
- {
- const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
- const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
- const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
-
- if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
- {
- trial_solution.m_valid = false;
- return false;
- }
- }
-
- const color_quad_u8 base_color(coords.get_scaled_color());
-
- const uint n = 8;
-
- trial_solution.m_error = cUINT64_MAX;
-
- for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table)
- {
- const int* pInten_table = g_etc1_inten_tables[inten_table];
-
- uint block_inten[4];
- color_quad_u8 block_colors[4];
- for (uint s = 0; s < 4; s++)
- {
- const int yd = pInten_table[s];
- color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
- block_colors[s] = block_color;
- block_inten[s] = block_color.r + block_color.g + block_color.b;
- }
-
- // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors.
- // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast.
- // 0 1 2 3
- // 01 12 23
- const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] };
-
- uint64 total_error = 0;
- const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
- if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0])
- {
- if (block_inten[0] > m_pSorted_luma[n - 1])
- {
- const uint min_error = intabs(block_inten[0] - m_pSorted_luma[n - 1]);
- if (min_error >= trial_solution.m_error)
- continue;
- }
-
- memset(&m_temp_selectors[0], 0, n);
-
- for (uint c = 0; c < n; c++)
- total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]);
- }
- else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2])
- {
- if (m_pSorted_luma[0] > block_inten[3])
- {
- const uint min_error = intabs(m_pSorted_luma[0] - block_inten[3]);
- if (min_error >= trial_solution.m_error)
- continue;
- }
-
- memset(&m_temp_selectors[0], 3, n);
-
- for (uint c = 0; c < n; c++)
- total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]);
- }
- else
- {
- uint cur_selector = 0, c;
- for (c = 0; c < n; c++)
- {
- const uint y = m_pSorted_luma[c];
- while ((y * 2) >= block_inten_midpoints[cur_selector])
- if (++cur_selector > 2)
- goto done;
- const uint sorted_pixel_index = m_pSorted_luma_indices[c];
- m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector);
- total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
- }
-done:
- while (c < n)
- {
- const uint sorted_pixel_index = m_pSorted_luma_indices[c];
- m_temp_selectors[sorted_pixel_index] = 3;
- total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
- ++c;
- }
- }
-
- if (total_error < trial_solution.m_error)
- {
- trial_solution.m_error = total_error;
- trial_solution.m_coords.m_inten_table = inten_table;
- memcpy(trial_solution.m_selectors, m_temp_selectors, n);
- trial_solution.m_valid = true;
- if (!total_error)
- break;
- }
- }
- trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
- trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
-
- bool success = false;
- if (pBest_solution)
- {
- if (trial_solution.m_error < pBest_solution->m_error)
- {
- *pBest_solution = trial_solution;
- success = true;
- }
- }
-
- return success;
- }
-
- static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c)
- {
- const uint limit = diff ? 32 : 16; limit;
- RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit));
- int c;
- if (diff)
- c = (packed_c >> 2) | (packed_c << 3);
- else
- c = packed_c | (packed_c << 4);
- c += g_etc1_inten_tables[inten][selector];
- c = rg_etc1::clamp<int>(c, 0, 255);
- return c;
- }
-
- static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; }
-
- void pack_etc1_block_init()
- {
- for (uint diff = 0; diff < 2; diff++)
- {
- const uint limit = diff ? 32 : 16;
-
- for (uint inten = 0; inten < 8; inten++)
- {
- for (uint selector = 0; selector < 4; selector++)
- {
- const uint inverse_table_index = diff + (inten << 1) + (selector << 4);
- for (uint color = 0; color < 256; color++)
- {
- uint best_error = cUINT32_MAX, best_packed_c = 0;
- for (uint packed_c = 0; packed_c < limit; packed_c++)
- {
- int v = etc1_decode_value(diff, inten, selector, packed_c);
- uint err = labs(v - static_cast<int>(color));
- //printf("err: %d - %u = %u\n",v,color,err);
- if (err < best_error)
- {
- best_error = err;
- best_packed_c = packed_c;
- if (!best_error)
- break;
- }
- }
- RG_ETC1_ASSERT(best_error <= 255);
- g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8));
- }
- }
- }
- }
-
- uint expand5[32];
- for(int i = 0; i < 32; i++)
- expand5[i] = (i << 3) | (i >> 2);
-
- for(int i = 0; i < 256 + 16; i++)
- {
- int v = clamp<int>(i - 8, 0, 255);
- g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]);
- }
- }
-
- // Packs solid color blocks efficiently using a set of small precomputed tables.
- // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
- static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params)
- {
- pack_params;
- RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
-
- static uint s_next_comp[4] = { 1, 2, 0, 1 };
-
- uint best_error = cUINT32_MAX, best_i = 0;
- int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
-
- // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
- for (uint i = 0; i < 3; i++)
- {
- const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
-
- const int delta_range = 1;
- for (int delta = -delta_range; delta <= delta_range; delta++)
- {
- const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
-
- const uint16* pTable;
- if (!c_plus_delta)
- pTable = g_color8_to_etc_block_config_0_255[0];
- else if (c_plus_delta == 255)
- pTable = g_color8_to_etc_block_config_0_255[1];
- else
- pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
-
- do
- {
- const uint x = *pTable++;
-
-#ifdef RG_ETC1_BUILD_DEBUG
- const uint diff = x & 1;
- const uint inten = (x >> 1) & 7;
- const uint selector = (x >> 4) & 3;
- const uint p0 = (x >> 8) & 255;
- RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
-#endif
-
- const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
- uint16 p1 = pInverse_table[c1];
- uint16 p2 = pInverse_table[c2];
- const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_x = x;
- best_packed_c1 = p1 & 0xFF;
- best_packed_c2 = p2 & 0xFF;
- best_i = i;
- if (!best_error)
- goto found_perfect_match;
- }
- } while (*pTable != 0xFFFF);
- }
- }
-found_perfect_match:
-
- const uint diff = best_x & 1;
- const uint inten = (best_x >> 1) & 7;
-
- block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1));
-
- const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3];
- *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0;
- *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0;
-
- const uint best_packed_c0 = (best_x >> 8) & 255;
- if (diff)
- {
- block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3);
- block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3);
- block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3);
- }
- else
- {
- block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4));
- block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4));
- block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4));
- }
-
- return best_error;
- }
-
- static uint pack_etc1_block_solid_color_constrained(
- etc1_optimizer::results& results,
- uint num_colors, const uint8* pColor,
- etc1_pack_params& pack_params,
- bool use_diff,
- const color_quad_u8* pBase_color5_unscaled)
- {
- RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
-
- pack_params;
- static uint s_next_comp[4] = { 1, 2, 0, 1 };
-
- uint best_error = cUINT32_MAX, best_i = 0;
- int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
-
- // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
- for (uint i = 0; i < 3; i++)
- {
- const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
-
- const int delta_range = 1;
- for (int delta = -delta_range; delta <= delta_range; delta++)
- {
- const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
-
- const uint16* pTable;
- if (!c_plus_delta)
- pTable = g_color8_to_etc_block_config_0_255[0];
- else if (c_plus_delta == 255)
- pTable = g_color8_to_etc_block_config_0_255[1];
- else
- pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
-
- do
- {
- const uint x = *pTable++;
- const uint diff = x & 1;
- if (static_cast<uint>(use_diff) != diff)
- {
- if (*pTable == 0xFFFF)
- break;
- continue;
- }
-
- if ((diff) && (pBase_color5_unscaled))
- {
- const int p0 = (x >> 8) & 255;
- int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]);
- if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax))
- {
- if (*pTable == 0xFFFF)
- break;
- continue;
- }
- }
-
-#ifdef RG_ETC1_BUILD_DEBUG
- {
- const uint inten = (x >> 1) & 7;
- const uint selector = (x >> 4) & 3;
- const uint p0 = (x >> 8) & 255;
- RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
- }
-#endif
-
- const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
- uint16 p1 = pInverse_table[c1];
- uint16 p2 = pInverse_table[c2];
-
- if ((diff) && (pBase_color5_unscaled))
- {
- int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]);
- int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]);
- if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax))
- {
- if (*pTable == 0xFFFF)
- break;
- continue;
- }
- }
-
- const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_x = x;
- best_packed_c1 = p1 & 0xFF;
- best_packed_c2 = p2 & 0xFF;
- best_i = i;
- if (!best_error)
- goto found_perfect_match;
- }
- } while (*pTable != 0xFFFF);
- }
- }
-found_perfect_match:
-
- if (best_error == cUINT32_MAX)
- return best_error;
-
- best_error *= num_colors;
-
- results.m_n = num_colors;
- results.m_block_color4 = !(best_x & 1);
- results.m_block_inten_table = (best_x >> 1) & 7;
- memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors);
-
- const uint best_packed_c0 = (best_x >> 8) & 255;
- results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0);
- results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1);
- results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2);
- results.m_error = best_error;
-
- return best_error;
- }
-
- // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
- static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block)
- {
- int err[8],*ep1 = err,*ep2 = err+4;
- uint8 *quant = g_quant5_tab+8;
-
- memset(dest, 0xFF, sizeof(color_quad_u8)*16);
-
- // process channels seperately
- for(int ch=0;ch<3;ch++)
- {
- uint8* bp = (uint8*)block;
- uint8* dp = (uint8*)dest;
-
- bp += ch; dp += ch;
-
- memset(err,0, sizeof(err));
- for(int y = 0; y < 4; y++)
- {
- // pixel 0
- dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
- ep1[0] = bp[ 0] - dp[ 0];
-
- // pixel 1
- dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
- ep1[1] = bp[ 4] - dp[ 4];
-
- // pixel 2
- dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
- ep1[2] = bp[ 8] - dp[ 8];
-
- // pixel 3
- dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
- ep1[3] = bp[12] - dp[12];
-
- // advance to next line
- int* tmp = ep1; ep1 = ep2; ep2 = tmp;
- bp += 16;
- dp += 16;
- }
- }
- }
-
- unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params)
- {
- const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba);
- etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block);
-
-#ifdef RG_ETC1_BUILD_DEBUG
- // Ensure all alpha values are 0xFF.
- for (uint i = 0; i < 16; i++)
- {
- RG_ETC1_ASSERT(pSrc_pixels[i].a == 255);
- }
-#endif
-
- color_quad_u8 src_pixel0(pSrc_pixels[0]);
-
- // Check for solid block.
- const uint32 first_pixel_u32 = pSrc_pixels->m_u32;
- int r;
- for (r = 15; r >= 1; --r)
- if (pSrc_pixels[r].m_u32 != first_pixel_u32)
- break;
- if (!r)
- return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params));
-
- color_quad_u8 dithered_pixels[16];
- if (pack_params.m_dithering)
- {
- dither_block_555(dithered_pixels, pSrc_pixels);
- pSrc_pixels = dithered_pixels;
- }
-
- etc1_optimizer optimizer;
-
- uint64 best_error = cUINT64_MAX;
- uint best_flip = false, best_use_color4 = false;
-
- uint8 best_selectors[2][8];
- etc1_optimizer::results best_results[2];
- for (uint i = 0; i < 2; i++)
- {
- best_results[i].m_n = 8;
- best_results[i].m_pSelectors = best_selectors[i];
- }
-
- uint8 selectors[3][8];
- etc1_optimizer::results results[3];
-
- for (uint i = 0; i < 3; i++)
- {
- results[i].m_n = 8;
- results[i].m_pSelectors = selectors[i];
- }
-
- color_quad_u8 subblock_pixels[8];
-
- etc1_optimizer::params params(pack_params);
- params.m_num_src_pixels = 8;
- params.m_pSrc_pixels = subblock_pixels;
-
- for (uint flip = 0; flip < 2; flip++)
- {
- for (uint use_color4 = 0; use_color4 < 2; use_color4++)
- {
- uint64 trial_error = 0;
-
- uint subblock;
- for (subblock = 0; subblock < 2; subblock++)
- {
- if (flip)
- memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
- else
- {
- const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2;
- subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12];
- subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13];
- }
-
- results[2].m_error = cUINT64_MAX;
- if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4)))
- {
- const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;
- for (r = 7; r >= 1; --r)
- if (subblock_pixels[r].m_u32 != subblock_pixel0_u32)
- break;
- if (!r)
- {
- pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL);
- }
- }
-
- params.m_use_color4 = (use_color4 != 0);
- params.m_constrain_against_base_color5 = false;
-
- if ((!use_color4) && (subblock))
- {
- params.m_constrain_against_base_color5 = true;
- params.m_base_color5 = results[0].m_block_color_unscaled;
- }
-
- if (params.m_quality == cHighQuality)
- {
- static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4);
- params.m_pScan_deltas = s_scan_delta_0_to_4;
- }
- else if (params.m_quality == cMediumQuality)
- {
- static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1);
- params.m_pScan_deltas = s_scan_delta_0_to_1;
- }
- else
- {
- static const int s_scan_delta_0[] = { 0 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0);
- params.m_pScan_deltas = s_scan_delta_0;
- }
-
- optimizer.init(params, results[subblock]);
- if (!optimizer.compute())
- break;
-
- if (params.m_quality >= cMediumQuality)
- {
- // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions.
- const uint refinement_error_thresh0 = 3000;
- const uint refinement_error_thresh1 = 6000;
- if (results[subblock].m_error > refinement_error_thresh0)
- {
- if (params.m_quality == cMediumQuality)
- {
- static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3);
- params.m_pScan_deltas = s_scan_delta_2_to_3;
- }
- else
- {
- static const int s_scan_delta_5_to_5[] = { -5, 5 };
- static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 };
- if (results[subblock].m_error > refinement_error_thresh1)
- {
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8);
- params.m_pScan_deltas = s_scan_delta_5_to_8;
- }
- else
- {
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5);
- params.m_pScan_deltas = s_scan_delta_5_to_5;
- }
- }
-
- if (!optimizer.compute())
- break;
- }
-
- if (results[2].m_error < results[subblock].m_error)
- results[subblock] = results[2];
- }
-
- trial_error += results[subblock].m_error;
- if (trial_error >= best_error)
- break;
- }
-
- if (subblock < 2)
- continue;
-
- best_error = trial_error;
- best_results[0] = results[0];
- best_results[1] = results[1];
- best_flip = flip;
- best_use_color4 = use_color4;
-
- } // use_color4
-
- } // flip
-
- int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r;
- int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g;
- int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b;
- RG_ETC1_ASSERT(best_use_color4 || ((rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)));
-
- if (best_use_color4)
- {
- dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4));
- dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4));
- dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4));
- }
- else
- {
- if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr);
- if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg);
- if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db);
- }
-
- dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip );
-
- uint selector0 = 0, selector1 = 0;
- if (best_flip)
- {
- // flipped:
- // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
- // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
- //
- // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
- // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
- const uint8* pSelectors0 = best_results[0].m_pSelectors;
- const uint8* pSelectors1 = best_results[1].m_pSelectors;
- for (int x = 3; x >= 0; --x)
- {
- uint b;
- b = g_selector_index_to_etc1[pSelectors1[4 + x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors1[x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors0[4 + x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors0[x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
- }
- }
- else
- {
- // non-flipped:
- // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
- // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
- //
- // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
- // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
- for (int subblock = 1; subblock >= 0; --subblock)
- {
- const uint8* pSelectors = best_results[subblock].m_pSelectors + 4;
- for (uint i = 0; i < 2; i++)
- {
- uint b;
- b = g_selector_index_to_etc1[pSelectors[3]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors[2]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors[1]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors[0]];
- selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1);
-
- pSelectors -= 4;
- }
- }
- }
-
- dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF);
- dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF);
-
- return static_cast<unsigned int>(best_error);
- }
-
-} // namespace rg_etc1
+// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
+// Please see ZLIB license at the end of rg_etc1.h.
+//
+// For more information Ericsson Texture Compression (ETC/ETC1), see:
+// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
+//
+// v1.03 - 5/12/13 - Initial public release
+#include "rg_etc1.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+//#include <stdio.h>
+#include <math.h>
+#include <stdio.h>
+#pragma warning (disable: 4201) // nonstandard extension used : nameless struct/union
+
+#if defined(_DEBUG) || defined(DEBUG)
+#define RG_ETC1_BUILD_DEBUG
+#endif
+
+#define RG_ETC1_ASSERT assert
+
+namespace rg_etc1
+{
+
+ inline long labs(long val) {
+ return val < 0 ? -val : val;
+ }
+
+ inline int intabs(int val) {
+
+ return val<0?-val:val;
+ }
+
+ typedef unsigned char uint8;
+ typedef unsigned short uint16;
+ typedef unsigned int uint;
+ typedef unsigned int uint32;
+ typedef long long int64;
+ typedef unsigned long long uint64;
+
+ const uint32 cUINT32_MAX = 0xFFFFFFFFU;
+ const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64;
+
+ template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; }
+ template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); }
+ template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; }
+ template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); }
+ template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); }
+ template<typename T> inline T square(T value) { return value * value; }
+ template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); }
+ template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); }
+
+ template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]);
+
+#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X)))
+
+ enum eNoClamp { cNoClamp };
+
+ struct color_quad_u8
+ {
+ static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; }
+
+ struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; };
+
+ public:
+ typedef unsigned char component_t;
+ typedef int parameter_t;
+
+ enum { cNumComps = 4 };
+
+ union
+ {
+ struct
+ {
+ component_t r;
+ component_t g;
+ component_t b;
+ component_t a;
+ };
+
+ component_t c[cNumComps];
+
+ uint32 m_u32;
+ };
+
+ inline color_quad_u8()
+ {
+ }
+
+ inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32)
+ {
+ }
+
+ explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax)
+ {
+ set(y, alpha);
+ }
+
+ inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+ {
+ set(red, green, blue, alpha);
+ }
+
+ explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax)
+ {
+ set_noclamp_y_alpha(y, alpha);
+ }
+
+ inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+ {
+ set_noclamp_rgba(red, green, blue, alpha);
+ }
+
+ inline void clear()
+ {
+ m_u32 = 0;
+ }
+
+ inline color_quad_u8& operator= (const color_quad_u8& other)
+ {
+ m_u32 = other.m_u32;
+ return *this;
+ }
+
+ inline color_quad_u8& set_rgb(const color_quad_u8& other)
+ {
+ r = other.r;
+ g = other.g;
+ b = other.b;
+ return *this;
+ }
+
+ inline color_quad_u8& operator= (parameter_t y)
+ {
+ set(y, component_traits::cMax);
+ return *this;
+ }
+
+ inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax)
+ {
+ y = clamp(y);
+ alpha = clamp(alpha);
+ r = static_cast<component_t>(y);
+ g = static_cast<component_t>(y);
+ b = static_cast<component_t>(y);
+ a = static_cast<component_t>(alpha);
+ return *this;
+ }
+
+ inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax)
+ {
+ RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) );
+ RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
+
+ r = static_cast<component_t>(y);
+ g = static_cast<component_t>(y);
+ b = static_cast<component_t>(y);
+ a = static_cast<component_t>(alpha);
+ return *this;
+ }
+
+ inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+ {
+ r = static_cast<component_t>(clamp(red));
+ g = static_cast<component_t>(clamp(green));
+ b = static_cast<component_t>(clamp(blue));
+ a = static_cast<component_t>(clamp(alpha));
+ return *this;
+ }
+
+ inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha)
+ {
+ RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
+ RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
+ RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
+ RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
+
+ r = static_cast<component_t>(red);
+ g = static_cast<component_t>(green);
+ b = static_cast<component_t>(blue);
+ a = static_cast<component_t>(alpha);
+ return *this;
+ }
+
+ inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue)
+ {
+ RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
+ RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
+ RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
+
+ r = static_cast<component_t>(red);
+ g = static_cast<component_t>(green);
+ b = static_cast<component_t>(blue);
+ return *this;
+ }
+
+ static inline parameter_t get_min_comp() { return component_traits::cMin; }
+ static inline parameter_t get_max_comp() { return component_traits::cMax; }
+ static inline bool get_comps_are_signed() { return component_traits::cSigned; }
+
+ inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
+ inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
+
+ inline color_quad_u8& set_component(uint i, parameter_t f)
+ {
+ RG_ETC1_ASSERT(i < cNumComps);
+
+ c[i] = static_cast<component_t>(clamp(f));
+
+ return *this;
+ }
+
+ inline color_quad_u8& set_grayscale(parameter_t l)
+ {
+ component_t x = static_cast<component_t>(clamp(l));
+ c[0] = x;
+ c[1] = x;
+ c[2] = x;
+ return *this;
+ }
+
+ inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h)
+ {
+ for (uint i = 0; i < cNumComps; i++)
+ c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i]));
+ return *this;
+ }
+
+ inline color_quad_u8& clamp(parameter_t l, parameter_t h)
+ {
+ for (uint i = 0; i < cNumComps; i++)
+ c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h));
+ return *this;
+ }
+
+ // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y).
+ inline parameter_t get_luma() const
+ {
+ return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U);
+ }
+
+ // Returns REC 709 luma.
+ inline parameter_t get_luma_rec709() const
+ {
+ return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U);
+ }
+
+ inline uint squared_distance_rgb(const color_quad_u8& c) const
+ {
+ return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b);
+ }
+
+ inline uint squared_distance_rgba(const color_quad_u8& c) const
+ {
+ return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a);
+ }
+
+ inline bool rgb_equals(const color_quad_u8& rhs) const
+ {
+ return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
+ }
+
+ inline bool operator== (const color_quad_u8& rhs) const
+ {
+ return m_u32 == rhs.m_u32;
+ }
+
+ color_quad_u8& operator+= (const color_quad_u8& other)
+ {
+ for (uint i = 0; i < 4; i++)
+ c[i] = static_cast<component_t>(clamp(c[i] + other.c[i]));
+ return *this;
+ }
+
+ color_quad_u8& operator-= (const color_quad_u8& other)
+ {
+ for (uint i = 0; i < 4; i++)
+ c[i] = static_cast<component_t>(clamp(c[i] - other.c[i]));
+ return *this;
+ }
+
+ friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs)
+ {
+ color_quad_u8 result(lhs);
+ result += rhs;
+ return result;
+ }
+
+ friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs)
+ {
+ color_quad_u8 result(lhs);
+ result -= rhs;
+ return result;
+ }
+ }; // class color_quad_u8
+
+ struct vec3F
+ {
+ float m_s[3];
+
+ inline vec3F() { }
+ inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; }
+ inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; }
+
+ inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; }
+
+ inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; }
+
+ inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; }
+ };
+
+ enum etc_constants
+ {
+ cETC1BytesPerBlock = 8U,
+
+ cETC1SelectorBits = 2U,
+ cETC1SelectorValues = 1U << cETC1SelectorBits,
+ cETC1SelectorMask = cETC1SelectorValues - 1U,
+
+ cETC1BlockShift = 2U,
+ cETC1BlockSize = 1U << cETC1BlockShift,
+
+ cETC1LSBSelectorIndicesBitOffset = 0,
+ cETC1MSBSelectorIndicesBitOffset = 16,
+
+ cETC1FlipBitOffset = 32,
+ cETC1DiffBitOffset = 33,
+
+ cETC1IntenModifierNumBits = 3,
+ cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
+ cETC1RightIntenModifierTableBitOffset = 34,
+ cETC1LeftIntenModifierTableBitOffset = 37,
+
+ // Base+Delta encoding (5 bit bases, 3 bit delta)
+ cETC1BaseColorCompNumBits = 5,
+ cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
+
+ cETC1DeltaColorCompNumBits = 3,
+ cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
+ cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
+
+ cETC1BaseColor5RBitOffset = 59,
+ cETC1BaseColor5GBitOffset = 51,
+ cETC1BaseColor5BBitOffset = 43,
+
+ cETC1DeltaColor3RBitOffset = 56,
+ cETC1DeltaColor3GBitOffset = 48,
+ cETC1DeltaColor3BBitOffset = 40,
+
+ // Absolute (non-delta) encoding (two 4-bit per component bases)
+ cETC1AbsColorCompNumBits = 4,
+ cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
+
+ cETC1AbsColor4R1BitOffset = 60,
+ cETC1AbsColor4G1BitOffset = 52,
+ cETC1AbsColor4B1BitOffset = 44,
+
+ cETC1AbsColor4R2BitOffset = 56,
+ cETC1AbsColor4G2BitOffset = 48,
+ cETC1AbsColor4B2BitOffset = 40,
+
+ cETC1ColorDeltaMin = -4,
+ cETC1ColorDeltaMax = 3,
+
+ // Delta3:
+ // 0 1 2 3 4 5 6 7
+ // 000 001 010 011 100 101 110 111
+ // 0 1 2 3 -4 -3 -2 -1
+ };
+
+ static uint8 g_quant5_tab[256+16];
+
+
+ static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] =
+ {
+ { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 },
+ { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 }
+ };
+
+ static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
+ static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
+
+ // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte.
+ static uint16 g_etc1_inverse_lookup[2*8*4][256]; // [diff/inten_table/selector][desired_color]
+
+ // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color.
+ // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8)
+ static const uint16 g_color8_to_etc_block_config_0_255[2][33] =
+ {
+ { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E,
+ 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF },
+ { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E,
+ 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF },
+ };
+
+ // Really only [254][11].
+ static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] =
+ {
+ { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E,
+ 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, {
+ 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306,
+ 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112,
+ 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707,
+ 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B,
+ 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605,
+ 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF
+ }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214,
+ 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A,
+ 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, {
+ 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B,
+ 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D,
+ 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805,
+ 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F,
+ 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, {
+ 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521,
+ 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523,
+ 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F,
+ 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B,
+ 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, {
+ 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F,
+ 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D,
+ 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529,
+ 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917,
+ 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E,
+ 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725,
+ 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139,
+ 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, {
+ 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A,
+ 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437,
+ 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500,
+ 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, {
+ 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19,
+ 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D,
+ 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, {
+ 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F,
+ 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D,
+ 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, {
+ 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05,
+ 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434,
+ 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01,
+ 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21,
+ 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27,
+ 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E,
+ 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D,
+ 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, {
+ 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, {
+ 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307,
+ 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33,
+ 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B,
+ 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, {
+ 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103,
+ 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B,
+ 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536,
+ 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A,
+ 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115,
+ 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, {
+ 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF
+ }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820,
+ 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031,
+ 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, {
+ 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35,
+ 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F,
+ 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D,
+ 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029,
+ 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832,
+ 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D,
+ 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133,
+ 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF
+ }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, {
+ 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331,
+ 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D,
+ 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513,
+ 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF
+ }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, {
+ 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, {
+ 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905,
+ 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09,
+ 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D,
+ 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621,
+ 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18,
+ 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919,
+ 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625,
+ 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F,
+ 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936,
+ 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A,
+ 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, {
+ 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913,
+ 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, {
+ 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20,
+ 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C,
+ 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, {
+ 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06,
+ 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, {
+ 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26,
+ 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18,
+ 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03,
+ 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929,
+ 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23,
+ 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF
+ }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B,
+ 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E,
+ 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18,
+ 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01,
+ 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16,
+ 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B,
+ 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01,
+ 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34,
+ 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11,
+ 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF },
+ };
+
+ struct etc1_block
+ {
+ // big endian uint64:
+ // bit ofs: 56 48 40 32 24 16 8 0
+ // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
+ union
+ {
+ uint64 m_uint64;
+ uint8 m_bytes[8];
+ };
+
+ uint8 m_low_color[2];
+ uint8 m_high_color[2];
+
+ enum { cNumSelectorBytes = 4 };
+ uint8 m_selectors[cNumSelectorBytes];
+
+ inline void clear()
+ {
+ zero_this(this);
+ }
+
+ inline uint get_byte_bits(uint ofs, uint num) const
+ {
+ RG_ETC1_ASSERT((ofs + num) <= 64U);
+ RG_ETC1_ASSERT(num && (num <= 8U));
+ RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
+ const uint byte_ofs = 7 - (ofs >> 3);
+ const uint byte_bit_ofs = ofs & 7;
+ return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
+ }
+
+ inline void set_byte_bits(uint ofs, uint num, uint bits)
+ {
+ RG_ETC1_ASSERT((ofs + num) <= 64U);
+ RG_ETC1_ASSERT(num && (num < 32U));
+ RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
+ RG_ETC1_ASSERT(bits < (1U << num));
+ const uint byte_ofs = 7 - (ofs >> 3);
+ const uint byte_bit_ofs = ofs & 7;
+ const uint mask = (1 << num) - 1;
+ m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
+ m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
+ }
+
+ // false = left/right subblocks
+ // true = upper/lower subblocks
+ inline bool get_flip_bit() const
+ {
+ return (m_bytes[3] & 1) != 0;
+ }
+
+ inline void set_flip_bit(bool flip)
+ {
+ m_bytes[3] &= ~1;
+ m_bytes[3] |= static_cast<uint8>(flip);
+ }
+
+ inline bool get_diff_bit() const
+ {
+ return (m_bytes[3] & 2) != 0;
+ }
+
+ inline void set_diff_bit(bool diff)
+ {
+ m_bytes[3] &= ~2;
+ m_bytes[3] |= (static_cast<uint>(diff) << 1);
+ }
+
+ // Returns intensity modifier table (0-7) used by subblock subblock_id.
+ // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2)
+ inline uint get_inten_table(uint subblock_id) const
+ {
+ RG_ETC1_ASSERT(subblock_id < 2);
+ const uint ofs = subblock_id ? 2 : 5;
+ return (m_bytes[3] >> ofs) & 7;
+ }
+
+ // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
+ inline void set_inten_table(uint subblock_id, uint t)
+ {
+ RG_ETC1_ASSERT(subblock_id < 2);
+ RG_ETC1_ASSERT(t < 8);
+ const uint ofs = subblock_id ? 2 : 5;
+ m_bytes[3] &= ~(7 << ofs);
+ m_bytes[3] |= (t << ofs);
+ }
+
+ // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+ inline uint get_selector(uint x, uint y) const
+ {
+ RG_ETC1_ASSERT((x | y) < 4);
+
+ const uint bit_index = x * 4 + y;
+ const uint byte_bit_ofs = bit_index & 7;
+ const uint8 *p = &m_bytes[7 - (bit_index >> 3)];
+ const uint lsb = (p[0] >> byte_bit_ofs) & 1;
+ const uint msb = (p[-2] >> byte_bit_ofs) & 1;
+ const uint val = lsb | (msb << 1);
+
+ return g_etc1_to_selector_index[val];
+ }
+
+ // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+ inline void set_selector(uint x, uint y, uint val)
+ {
+ RG_ETC1_ASSERT((x | y | val) < 4);
+ const uint bit_index = x * 4 + y;
+
+ uint8 *p = &m_bytes[7 - (bit_index >> 3)];
+
+ const uint byte_bit_ofs = bit_index & 7;
+ const uint mask = 1 << byte_bit_ofs;
+
+ const uint etc1_val = g_selector_index_to_etc1[val];
+
+ const uint lsb = etc1_val & 1;
+ const uint msb = etc1_val >> 1;
+
+ p[0] &= ~mask;
+ p[0] |= (lsb << byte_bit_ofs);
+
+ p[-2] &= ~mask;
+ p[-2] |= (msb << byte_bit_ofs);
+ }
+
+ inline void set_base4_color(uint idx, uint16 c)
+ {
+ if (idx)
+ {
+ set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
+ set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
+ set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
+ }
+ else
+ {
+ set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
+ set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
+ set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
+ }
+ }
+
+ inline uint16 get_base4_color(uint idx) const
+ {
+ uint r, g, b;
+ if (idx)
+ {
+ r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
+ g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
+ b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
+ }
+ else
+ {
+ r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
+ g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
+ b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
+ }
+ return static_cast<uint16>(b | (g << 4U) | (r << 8U));
+ }
+
+ inline void set_base5_color(uint16 c)
+ {
+ set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
+ set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
+ set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
+ }
+
+ inline uint16 get_base5_color() const
+ {
+ const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
+ const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
+ const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
+ return static_cast<uint16>(b | (g << 5U) | (r << 10U));
+ }
+
+ void set_delta3_color(uint16 c)
+ {
+ set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
+ set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
+ set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
+ }
+
+ inline uint16 get_delta3_color() const
+ {
+ const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
+ const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
+ const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
+ return static_cast<uint16>(b | (g << 3U) | (r << 6U));
+ }
+
+ // Base color 5
+ static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U);
+ static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U);
+
+ static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U);
+ static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled);
+
+ static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
+ static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
+
+ // Delta color 3
+ // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
+ static uint16 pack_delta3(int r, int g, int b);
+
+ // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
+ static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3);
+
+ // Abs color 4
+ static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U);
+ static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U);
+
+ static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U);
+ static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled);
+
+ // subblock colors
+ static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx);
+ static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx);
+ static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx);
+
+ static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4)
+ {
+ if (color4)
+ {
+ dst.r = src.r | (src.r << 4);
+ dst.g = src.g | (src.g << 4);
+ dst.b = src.b | (src.b << 4);
+ }
+ else
+ {
+ dst.r = (src.r >> 2) | (src.r << 3);
+ dst.g = (src.g >> 2) | (src.g << 3);
+ dst.b = (src.b >> 2) | (src.b << 3);
+ }
+ dst.a = src.a;
+ }
+ };
+
+ // Returns pointer to sorted array.
+ template<typename T, typename Q>
+ T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices)
+ {
+ RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T)));
+ RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4));
+
+ if (init_indices)
+ {
+ T* p = pIndices0;
+ T* q = pIndices0 + (num_indices >> 1) * 2;
+ uint i;
+ for (i = 0; p != q; p += 2, i += 2)
+ {
+ p[0] = static_cast<T>(i);
+ p[1] = static_cast<T>(i + 1);
+ }
+
+ if (num_indices & 1)
+ *p = static_cast<T>(i);
+ }
+
+ uint hist[256 * 4];
+
+ memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
+
+#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs))
+#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs))
+
+ if (key_size == 4)
+ {
+ T* p = pIndices0;
+ T* q = pIndices0 + num_indices;
+ for ( ; p != q; p++)
+ {
+ const uint key = RG_ETC1_GET_KEY(p);
+
+ hist[ key & 0xFF]++;
+ hist[256 + ((key >> 8) & 0xFF)]++;
+ hist[512 + ((key >> 16) & 0xFF)]++;
+ hist[768 + ((key >> 24) & 0xFF)]++;
+ }
+ }
+ else if (key_size == 3)
+ {
+ T* p = pIndices0;
+ T* q = pIndices0 + num_indices;
+ for ( ; p != q; p++)
+ {
+ const uint key = RG_ETC1_GET_KEY(p);
+
+ hist[ key & 0xFF]++;
+ hist[256 + ((key >> 8) & 0xFF)]++;
+ hist[512 + ((key >> 16) & 0xFF)]++;
+ }
+ }
+ else if (key_size == 2)
+ {
+ T* p = pIndices0;
+ T* q = pIndices0 + (num_indices >> 1) * 2;
+
+ for ( ; p != q; p += 2)
+ {
+ const uint key0 = RG_ETC1_GET_KEY(p);
+ const uint key1 = RG_ETC1_GET_KEY(p+1);
+
+ hist[ key0 & 0xFF]++;
+ hist[256 + ((key0 >> 8) & 0xFF)]++;
+
+ hist[ key1 & 0xFF]++;
+ hist[256 + ((key1 >> 8) & 0xFF)]++;
+ }
+
+ if (num_indices & 1)
+ {
+ const uint key = RG_ETC1_GET_KEY(p);
+
+ hist[ key & 0xFF]++;
+ hist[256 + ((key >> 8) & 0xFF)]++;
+ }
+ }
+ else
+ {
+ RG_ETC1_ASSERT(key_size == 1);
+ if (key_size != 1)
+ return NULL;
+
+ T* p = pIndices0;
+ T* q = pIndices0 + (num_indices >> 1) * 2;
+
+ for ( ; p != q; p += 2)
+ {
+ const uint key0 = RG_ETC1_GET_KEY(p);
+ const uint key1 = RG_ETC1_GET_KEY(p+1);
+
+ hist[key0 & 0xFF]++;
+ hist[key1 & 0xFF]++;
+ }
+
+ if (num_indices & 1)
+ {
+ const uint key = RG_ETC1_GET_KEY(p);
+
+ hist[key & 0xFF]++;
+ }
+ }
+
+ T* pCur = pIndices0;
+ T* pNew = pIndices1;
+
+ for (uint pass = 0; pass < key_size; pass++)
+ {
+ const uint* pHist = &hist[pass << 8];
+
+ uint offsets[256];
+
+ uint cur_ofs = 0;
+ for (uint i = 0; i < 256; i += 2)
+ {
+ offsets[i] = cur_ofs;
+ cur_ofs += pHist[i];
+
+ offsets[i+1] = cur_ofs;
+ cur_ofs += pHist[i+1];
+ }
+
+ const uint pass_shift = pass << 3;
+
+ T* p = pCur;
+ T* q = pCur + (num_indices >> 1) * 2;
+
+ for ( ; p != q; p += 2)
+ {
+ uint index0 = p[0];
+ uint index1 = p[1];
+
+ uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF;
+ uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF;
+
+ if (c0 == c1)
+ {
+ uint dst_offset0 = offsets[c0];
+
+ offsets[c0] = dst_offset0 + 2;
+
+ pNew[dst_offset0] = static_cast<T>(index0);
+ pNew[dst_offset0 + 1] = static_cast<T>(index1);
+ }
+ else
+ {
+ uint dst_offset0 = offsets[c0]++;
+ uint dst_offset1 = offsets[c1]++;
+
+ pNew[dst_offset0] = static_cast<T>(index0);
+ pNew[dst_offset1] = static_cast<T>(index1);
+ }
+ }
+
+ if (num_indices & 1)
+ {
+ uint index = *p;
+ uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF;
+
+ uint dst_offset = offsets[c];
+ offsets[c] = dst_offset + 1;
+
+ pNew[dst_offset] = static_cast<T>(index);
+ }
+
+ T* t = pCur;
+ pCur = pNew;
+ pNew = t;
+ }
+
+ return pCur;
+ }
+
+#undef RG_ETC1_GET_KEY
+#undef RG_ETC1_GET_KEY_FROM_INDEX
+
+ uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias)
+ {
+ return pack_color5(color.r, color.g, color.b, scaled, bias);
+ }
+
+ uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias)
+ {
+ if (scaled)
+ {
+ r = (r * 31U + bias) / 255U;
+ g = (g * 31U + bias) / 255U;
+ b = (b * 31U + bias) / 255U;
+ }
+
+ r = rg_etc1::minimum(r, 31U);
+ g = rg_etc1::minimum(g, 31U);
+ b = rg_etc1::minimum(b, 31U);
+
+ return static_cast<uint16>(b | (g << 5U) | (r << 10U));
+ }
+
+ color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha)
+ {
+ uint b = packed_color5 & 31U;
+ uint g = (packed_color5 >> 5U) & 31U;
+ uint r = (packed_color5 >> 10U) & 31U;
+
+ if (scaled)
+ {
+ b = (b << 3U) | (b >> 2U);
+ g = (g << 3U) | (g >> 2U);
+ r = (r << 3U) | (r >> 2U);
+ }
+
+ return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
+ }
+
+ void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled)
+ {
+ color_quad_u8 c(unpack_color5(packed_color5, scaled, 0));
+ r = c.r;
+ g = c.g;
+ b = c.b;
+ }
+
+ bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
+ {
+ int dc_r, dc_g, dc_b;
+ unpack_delta3(dc_r, dc_g, dc_b, packed_delta3);
+
+ int b = (packed_color5 & 31U) + dc_b;
+ int g = ((packed_color5 >> 5U) & 31U) + dc_g;
+ int r = ((packed_color5 >> 10U) & 31U) + dc_r;
+
+ bool success = true;
+ if (static_cast<uint>(r | g | b) > 31U)
+ {
+ success = false;
+ r = rg_etc1::clamp<int>(r, 0, 31);
+ g = rg_etc1::clamp<int>(g, 0, 31);
+ b = rg_etc1::clamp<int>(b, 0, 31);
+ }
+
+ if (scaled)
+ {
+ b = (b << 3U) | (b >> 2U);
+ g = (g << 3U) | (g >> 2U);
+ r = (r << 3U) | (r >> 2U);
+ }
+
+ result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U));
+ return success;
+ }
+
+ bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
+ {
+ color_quad_u8 result;
+ const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha);
+ r = result.r;
+ g = result.g;
+ b = result.b;
+ return success;
+ }
+
+ uint16 etc1_block::pack_delta3(int r, int g, int b)
+ {
+ RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
+ RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
+ RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
+ if (r < 0) r += 8;
+ if (g < 0) g += 8;
+ if (b < 0) b += 8;
+ return static_cast<uint16>(b | (g << 3) | (r << 6));
+ }
+
+ void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3)
+ {
+ r = (packed_delta3 >> 6) & 7;
+ g = (packed_delta3 >> 3) & 7;
+ b = packed_delta3 & 7;
+ if (r >= 4) r -= 8;
+ if (g >= 4) g -= 8;
+ if (b >= 4) b -= 8;
+ }
+
+ uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias)
+ {
+ return pack_color4(color.r, color.g, color.b, scaled, bias);
+ }
+
+ uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias)
+ {
+ if (scaled)
+ {
+ r = (r * 15U + bias) / 255U;
+ g = (g * 15U + bias) / 255U;
+ b = (b * 15U + bias) / 255U;
+ }
+
+ r = rg_etc1::minimum(r, 15U);
+ g = rg_etc1::minimum(g, 15U);
+ b = rg_etc1::minimum(b, 15U);
+
+ return static_cast<uint16>(b | (g << 4U) | (r << 8U));
+ }
+
+ color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha)
+ {
+ uint b = packed_color4 & 15U;
+ uint g = (packed_color4 >> 4U) & 15U;
+ uint r = (packed_color4 >> 8U) & 15U;
+
+ if (scaled)
+ {
+ b = (b << 4U) | b;
+ g = (g << 4U) | g;
+ r = (r << 4U) | r;
+ }
+
+ return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
+ }
+
+ void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled)
+ {
+ color_quad_u8 c(unpack_color4(packed_color4, scaled, 0));
+ r = c.r;
+ g = c.g;
+ b = c.b;
+ }
+
+ void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx)
+ {
+ RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+ const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+ uint r, g, b;
+ unpack_color5(r, g, b, packed_color5, true);
+
+ const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+ const int y0 = pInten_modifer_table[0];
+ pDst[0].set(ir + y0, ig + y0, ib + y0);
+
+ const int y1 = pInten_modifer_table[1];
+ pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+ const int y2 = pInten_modifer_table[2];
+ pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+ const int y3 = pInten_modifer_table[3];
+ pDst[3].set(ir + y3, ig + y3, ib + y3);
+ }
+
+ bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx)
+ {
+ RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+ const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+ uint r, g, b;
+ bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true);
+
+ const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+ const int y0 = pInten_modifer_table[0];
+ pDst[0].set(ir + y0, ig + y0, ib + y0);
+
+ const int y1 = pInten_modifer_table[1];
+ pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+ const int y2 = pInten_modifer_table[2];
+ pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+ const int y3 = pInten_modifer_table[3];
+ pDst[3].set(ir + y3, ig + y3, ib + y3);
+
+ return success;
+ }
+
+ void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx)
+ {
+ RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+ const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+ uint r, g, b;
+ unpack_color4(r, g, b, packed_color4, true);
+
+ const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+ const int y0 = pInten_modifer_table[0];
+ pDst[0].set(ir + y0, ig + y0, ib + y0);
+
+ const int y1 = pInten_modifer_table[1];
+ pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+ const int y2 = pInten_modifer_table[2];
+ pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+ const int y3 = pInten_modifer_table[3];
+ pDst[3].set(ir + y3, ig + y3, ib + y3);
+ }
+
+ bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha)
+ {
+ color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba);
+ const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block);
+
+ const bool diff_flag = block.get_diff_bit();
+ const bool flip_flag = block.get_flip_bit();
+ const uint table_index0 = block.get_inten_table(0);
+ const uint table_index1 = block.get_inten_table(1);
+
+ color_quad_u8 subblock_colors0[4];
+ color_quad_u8 subblock_colors1[4];
+ bool success = true;
+
+ if (diff_flag)
+ {
+ const uint16 base_color5 = block.get_base5_color();
+ const uint16 delta_color3 = block.get_delta3_color();
+ etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0);
+
+ if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1))
+ success = false;
+ }
+ else
+ {
+ const uint16 base_color4_0 = block.get_base4_color(0);
+ etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0);
+
+ const uint16 base_color4_1 = block.get_base4_color(1);
+ etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1);
+ }
+
+ if (preserve_alpha)
+ {
+ if (flip_flag)
+ {
+ for (uint y = 0; y < 2; y++)
+ {
+ pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
+ pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
+ pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]);
+ pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]);
+ pDst += 4;
+ }
+
+ for (uint y = 2; y < 4; y++)
+ {
+ pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]);
+ pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]);
+ pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
+ pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
+ pDst += 4;
+ }
+ }
+ else
+ {
+ for (uint y = 0; y < 4; y++)
+ {
+ pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
+ pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
+ pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
+ pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
+ pDst += 4;
+ }
+ }
+ }
+ else
+ {
+ if (flip_flag)
+ {
+ // 0000
+ // 0000
+ // 1111
+ // 1111
+ for (uint y = 0; y < 2; y++)
+ {
+ pDst[0] = subblock_colors0[block.get_selector(0, y)];
+ pDst[1] = subblock_colors0[block.get_selector(1, y)];
+ pDst[2] = subblock_colors0[block.get_selector(2, y)];
+ pDst[3] = subblock_colors0[block.get_selector(3, y)];
+ pDst += 4;
+ }
+
+ for (uint y = 2; y < 4; y++)
+ {
+ pDst[0] = subblock_colors1[block.get_selector(0, y)];
+ pDst[1] = subblock_colors1[block.get_selector(1, y)];
+ pDst[2] = subblock_colors1[block.get_selector(2, y)];
+ pDst[3] = subblock_colors1[block.get_selector(3, y)];
+ pDst += 4;
+ }
+ }
+ else
+ {
+ // 0011
+ // 0011
+ // 0011
+ // 0011
+ for (uint y = 0; y < 4; y++)
+ {
+ pDst[0] = subblock_colors0[block.get_selector(0, y)];
+ pDst[1] = subblock_colors0[block.get_selector(1, y)];
+ pDst[2] = subblock_colors1[block.get_selector(2, y)];
+ pDst[3] = subblock_colors1[block.get_selector(3, y)];
+ pDst += 4;
+ }
+ }
+ }
+
+ return success;
+ }
+
+ struct etc1_solution_coordinates
+ {
+ inline etc1_solution_coordinates() :
+ m_unscaled_color(0, 0, 0, 0),
+ m_inten_table(0),
+ m_color4(false)
+ {
+ }
+
+ inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) :
+ m_unscaled_color(r, g, b, 255),
+ m_inten_table(inten_table),
+ m_color4(color4)
+ {
+ }
+
+ inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) :
+ m_unscaled_color(c),
+ m_inten_table(inten_table),
+ m_color4(color4)
+ {
+ }
+
+ inline etc1_solution_coordinates(const etc1_solution_coordinates& other)
+ {
+ *this = other;
+ }
+
+ inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs)
+ {
+ m_unscaled_color = rhs.m_unscaled_color;
+ m_inten_table = rhs.m_inten_table;
+ m_color4 = rhs.m_color4;
+ return *this;
+ }
+
+ inline void clear()
+ {
+ m_unscaled_color.clear();
+ m_inten_table = 0;
+ m_color4 = false;
+ }
+
+ inline color_quad_u8 get_scaled_color() const
+ {
+ int br, bg, bb;
+ if (m_color4)
+ {
+ br = m_unscaled_color.r | (m_unscaled_color.r << 4);
+ bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
+ bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
+ }
+ else
+ {
+ br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
+ bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
+ bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
+ }
+ return color_quad_u8(br, bg, bb);
+ }
+
+ inline void get_block_colors(color_quad_u8* pBlock_colors)
+ {
+ int br, bg, bb;
+ if (m_color4)
+ {
+ br = m_unscaled_color.r | (m_unscaled_color.r << 4);
+ bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
+ bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
+ }
+ else
+ {
+ br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
+ bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
+ bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
+ }
+ const int* pInten_table = g_etc1_inten_tables[m_inten_table];
+ pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]);
+ pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]);
+ pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]);
+ pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]);
+ }
+
+ color_quad_u8 m_unscaled_color;
+ uint m_inten_table;
+ bool m_color4;
+ };
+
+ class etc1_optimizer
+ {
+ etc1_optimizer(const etc1_optimizer&);
+ etc1_optimizer& operator= (const etc1_optimizer&);
+
+ public:
+ etc1_optimizer()
+ {
+ clear();
+ }
+
+ void clear()
+ {
+ m_pParams = NULL;
+ m_pResult = NULL;
+ m_pSorted_luma = NULL;
+ m_pSorted_luma_indices = NULL;
+ }
+
+ struct params : etc1_pack_params
+ {
+ params()
+ {
+ clear();
+ }
+
+ params(const etc1_pack_params& base_params) :
+ etc1_pack_params(base_params)
+ {
+ clear_optimizer_params();
+ }
+
+ void clear()
+ {
+ etc1_pack_params::clear();
+ clear_optimizer_params();
+ }
+
+ void clear_optimizer_params()
+ {
+ m_num_src_pixels = 0;
+ m_pSrc_pixels = 0;
+
+ m_use_color4 = false;
+ static const int s_default_scan_delta[] = { 0 };
+ m_pScan_deltas = s_default_scan_delta;
+ m_scan_delta_size = 1;
+
+ m_base_color5.clear();
+ m_constrain_against_base_color5 = false;
+ }
+
+ uint m_num_src_pixels;
+ const color_quad_u8* m_pSrc_pixels;
+
+ bool m_use_color4;
+ const int* m_pScan_deltas;
+ uint m_scan_delta_size;
+
+ color_quad_u8 m_base_color5;
+ bool m_constrain_against_base_color5;
+ };
+
+ struct results
+ {
+ uint64 m_error;
+ color_quad_u8 m_block_color_unscaled;
+ uint m_block_inten_table;
+ uint m_n;
+ uint8* m_pSelectors;
+ bool m_block_color4;
+
+ inline results& operator= (const results& rhs)
+ {
+ m_block_color_unscaled = rhs.m_block_color_unscaled;
+ m_block_color4 = rhs.m_block_color4;
+ m_block_inten_table = rhs.m_block_inten_table;
+ m_error = rhs.m_error;
+ RG_ETC1_ASSERT(m_n == rhs.m_n);
+ memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n);
+ return *this;
+ }
+ };
+
+ void init(const params& params, results& result);
+ bool compute();
+
+ private:
+ struct potential_solution
+ {
+ potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false)
+ {
+ }
+
+ etc1_solution_coordinates m_coords;
+ uint8 m_selectors[8];
+ uint64 m_error;
+ bool m_valid;
+
+ void clear()
+ {
+ m_coords.clear();
+ m_error = cUINT64_MAX;
+ m_valid = false;
+ }
+ };
+
+ const params* m_pParams;
+ results* m_pResult;
+
+ int m_limit;
+
+ vec3F m_avg_color;
+ int m_br, m_bg, m_bb;
+ uint16 m_luma[8];
+ uint32 m_sorted_luma[2][8];
+ const uint32* m_pSorted_luma_indices;
+ uint32* m_pSorted_luma;
+
+ uint8 m_selectors[8];
+ uint8 m_best_selectors[8];
+
+ potential_solution m_best_solution;
+ potential_solution m_trial_solution;
+ uint8 m_temp_selectors[8];
+
+ bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
+ bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
+ };
+
+ bool etc1_optimizer::compute()
+ {
+ const uint n = m_pParams->m_num_src_pixels;
+ const int scan_delta_size = m_pParams->m_scan_delta_size;
+
+ // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color.
+ // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index.
+ for (int zdi = 0; zdi < scan_delta_size; zdi++)
+ {
+ const int zd = m_pParams->m_pScan_deltas[zdi];
+ const int mbb = m_bb + zd;
+ if (mbb < 0) continue; else if (mbb > m_limit) break;
+
+ for (int ydi = 0; ydi < scan_delta_size; ydi++)
+ {
+ const int yd = m_pParams->m_pScan_deltas[ydi];
+ const int mbg = m_bg + yd;
+ if (mbg < 0) continue; else if (mbg > m_limit) break;
+
+ for (int xdi = 0; xdi < scan_delta_size; xdi++)
+ {
+ const int xd = m_pParams->m_pScan_deltas[xdi];
+ const int mbr = m_br + xd;
+ if (mbr < 0) continue; else if (mbr > m_limit) break;
+
+ etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4);
+ if (m_pParams->m_quality == cHighQuality)
+ {
+ if (!evaluate_solution(coords, m_trial_solution, &m_best_solution))
+ continue;
+ }
+ else
+ {
+ if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution))
+ continue;
+ }
+
+ // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index.
+ // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors:
+ // The goal is:
+ // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0
+ // Rearranging this:
+ // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0
+ // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0
+ // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0
+ // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0
+ // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0
+ // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4
+ // So what this means:
+ // optimal_block_color = avg_input - avg_inten_delta
+ // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta.
+ // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula.
+ // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping.
+
+ const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2);
+ for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++)
+ {
+ const uint8* pSelectors = m_best_solution.m_selectors;
+ const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table];
+
+ int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0;
+ const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color());
+ for (uint r = 0; r < n; r++)
+ {
+ const uint s = *pSelectors++;
+ const int yd = pInten_table[s];
+ // Compute actual delta being applied to each pixel, taking into account clamping.
+ delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r;
+ delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g;
+ delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b;
+ }
+ if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b))
+ break;
+ const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n;
+ const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n;
+ const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n;
+ const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit);
+ const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit);
+ const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit);
+
+ bool skip = false;
+
+ if ((mbr == br1) && (mbg == bg1) && (mbb == bb1))
+ skip = true;
+ else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b))
+ skip = true;
+ else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1))
+ skip = true;
+
+ if (skip)
+ break;
+
+ etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4);
+ if (m_pParams->m_quality == cHighQuality)
+ {
+ if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution))
+ break;
+ }
+ else
+ {
+ if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution))
+ break;
+ }
+
+ } // refinement_trial
+
+ } // xdi
+ } // ydi
+ } // zdi
+
+ if (!m_best_solution.m_valid)
+ {
+ m_pResult->m_error = cUINT32_MAX;
+ return false;
+ }
+
+ const uint8* pSelectors = m_best_solution.m_selectors;
+
+#ifdef RG_ETC1_BUILD_DEBUG
+ {
+ color_quad_u8 block_colors[4];
+ m_best_solution.m_coords.get_block_colors(block_colors);
+
+ const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+ uint64 actual_error = 0;
+ for (uint i = 0; i < n; i++)
+ actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]);
+
+ RG_ETC1_ASSERT(actual_error == m_best_solution.m_error);
+ }
+#endif
+
+ m_pResult->m_error = m_best_solution.m_error;
+
+ m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color;
+ m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4;
+
+ m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table;
+ memcpy(m_pResult->m_pSelectors, pSelectors, n);
+ m_pResult->m_n = n;
+
+ return true;
+ }
+
+ void etc1_optimizer::init(const params& p, results& r)
+ {
+ // This version is hardcoded for 8 pixel subblocks.
+ RG_ETC1_ASSERT(p.m_num_src_pixels == 8);
+
+ m_pParams = &p;
+ m_pResult = &r;
+
+ const uint n = 8;
+
+ m_limit = m_pParams->m_use_color4 ? 15 : 31;
+
+ vec3F avg_color(0.0f);
+
+ for (uint i = 0; i < n; i++)
+ {
+ const color_quad_u8& c = m_pParams->m_pSrc_pixels[i];
+ const vec3F fc(c.r, c.g, c.b);
+
+ avg_color += fc;
+
+ m_luma[i] = static_cast<uint16>(c.r + c.g + c.b);
+ m_sorted_luma[0][i] = i;
+ }
+ avg_color *= (1.0f / static_cast<float>(n));
+ m_avg_color = avg_color;
+
+ m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit);
+ m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit);
+ m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit);
+
+ if (m_pParams->m_quality <= cMediumQuality)
+ {
+ m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false);
+ m_pSorted_luma = m_sorted_luma[0];
+ if (m_pSorted_luma_indices == m_sorted_luma[0])
+ m_pSorted_luma = m_sorted_luma[1];
+
+ for (uint i = 0; i < n; i++)
+ m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]];
+ }
+
+ m_best_solution.m_coords.clear();
+ m_best_solution.m_valid = false;
+ m_best_solution.m_error = cUINT64_MAX;
+ }
+
+ bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
+ {
+ trial_solution.m_valid = false;
+
+ if (m_pParams->m_constrain_against_base_color5)
+ {
+ const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
+ const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
+ const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
+
+ if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
+ return false;
+ }
+
+ const color_quad_u8 base_color(coords.get_scaled_color());
+
+ const uint n = 8;
+
+ trial_solution.m_error = cUINT64_MAX;
+
+ for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++)
+ {
+ const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+ color_quad_u8 block_colors[4];
+ for (uint s = 0; s < 4; s++)
+ {
+ const int yd = pInten_table[s];
+ block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
+ }
+
+ uint64 total_error = 0;
+
+ const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+ for (uint c = 0; c < n; c++)
+ {
+ const color_quad_u8& src_pixel = *pSrc_pixels++;
+
+ uint best_selector_index = 0;
+ uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b);
+
+ uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b);
+ if (trial_error < best_error)
+ {
+ best_error = trial_error;
+ best_selector_index = 1;
+ }
+
+ trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b);
+ if (trial_error < best_error)
+ {
+ best_error = trial_error;
+ best_selector_index = 2;
+ }
+
+ trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b);
+ if (trial_error < best_error)
+ {
+ best_error = trial_error;
+ best_selector_index = 3;
+ }
+
+ m_temp_selectors[c] = static_cast<uint8>(best_selector_index);
+
+ total_error += best_error;
+ if (total_error >= trial_solution.m_error)
+ break;
+ }
+
+ if (total_error < trial_solution.m_error)
+ {
+ trial_solution.m_error = total_error;
+ trial_solution.m_coords.m_inten_table = inten_table;
+ memcpy(trial_solution.m_selectors, m_temp_selectors, 8);
+ trial_solution.m_valid = true;
+ }
+ }
+ trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
+ trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
+
+ bool success = false;
+ if (pBest_solution)
+ {
+ if (trial_solution.m_error < pBest_solution->m_error)
+ {
+ *pBest_solution = trial_solution;
+ success = true;
+ }
+ }
+
+ return success;
+ }
+
+ bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
+ {
+ if (m_pParams->m_constrain_against_base_color5)
+ {
+ const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
+ const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
+ const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
+
+ if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
+ {
+ trial_solution.m_valid = false;
+ return false;
+ }
+ }
+
+ const color_quad_u8 base_color(coords.get_scaled_color());
+
+ const uint n = 8;
+
+ trial_solution.m_error = cUINT64_MAX;
+
+ for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table)
+ {
+ const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+ uint block_inten[4];
+ color_quad_u8 block_colors[4];
+ for (uint s = 0; s < 4; s++)
+ {
+ const int yd = pInten_table[s];
+ color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
+ block_colors[s] = block_color;
+ block_inten[s] = block_color.r + block_color.g + block_color.b;
+ }
+
+ // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors.
+ // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast.
+ // 0 1 2 3
+ // 01 12 23
+ const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] };
+
+ uint64 total_error = 0;
+ const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+ if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0])
+ {
+ if (block_inten[0] > m_pSorted_luma[n - 1])
+ {
+ const uint min_error = intabs(block_inten[0] - m_pSorted_luma[n - 1]);
+ if (min_error >= trial_solution.m_error)
+ continue;
+ }
+
+ memset(&m_temp_selectors[0], 0, n);
+
+ for (uint c = 0; c < n; c++)
+ total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]);
+ }
+ else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2])
+ {
+ if (m_pSorted_luma[0] > block_inten[3])
+ {
+ const uint min_error = intabs(m_pSorted_luma[0] - block_inten[3]);
+ if (min_error >= trial_solution.m_error)
+ continue;
+ }
+
+ memset(&m_temp_selectors[0], 3, n);
+
+ for (uint c = 0; c < n; c++)
+ total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]);
+ }
+ else
+ {
+ uint cur_selector = 0, c;
+ for (c = 0; c < n; c++)
+ {
+ const uint y = m_pSorted_luma[c];
+ while ((y * 2) >= block_inten_midpoints[cur_selector])
+ if (++cur_selector > 2)
+ goto done;
+ const uint sorted_pixel_index = m_pSorted_luma_indices[c];
+ m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector);
+ total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
+ }
+done:
+ while (c < n)
+ {
+ const uint sorted_pixel_index = m_pSorted_luma_indices[c];
+ m_temp_selectors[sorted_pixel_index] = 3;
+ total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
+ ++c;
+ }
+ }
+
+ if (total_error < trial_solution.m_error)
+ {
+ trial_solution.m_error = total_error;
+ trial_solution.m_coords.m_inten_table = inten_table;
+ memcpy(trial_solution.m_selectors, m_temp_selectors, n);
+ trial_solution.m_valid = true;
+ if (!total_error)
+ break;
+ }
+ }
+ trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
+ trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
+
+ bool success = false;
+ if (pBest_solution)
+ {
+ if (trial_solution.m_error < pBest_solution->m_error)
+ {
+ *pBest_solution = trial_solution;
+ success = true;
+ }
+ }
+
+ return success;
+ }
+
+ static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c)
+ {
+ const uint limit = diff ? 32 : 16; limit;
+ RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit));
+ int c;
+ if (diff)
+ c = (packed_c >> 2) | (packed_c << 3);
+ else
+ c = packed_c | (packed_c << 4);
+ c += g_etc1_inten_tables[inten][selector];
+ c = rg_etc1::clamp<int>(c, 0, 255);
+ return c;
+ }
+
+ static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; }
+
+ void pack_etc1_block_init()
+ {
+ for (uint diff = 0; diff < 2; diff++)
+ {
+ const uint limit = diff ? 32 : 16;
+
+ for (uint inten = 0; inten < 8; inten++)
+ {
+ for (uint selector = 0; selector < 4; selector++)
+ {
+ const uint inverse_table_index = diff + (inten << 1) + (selector << 4);
+ for (uint color = 0; color < 256; color++)
+ {
+ uint best_error = cUINT32_MAX, best_packed_c = 0;
+ for (uint packed_c = 0; packed_c < limit; packed_c++)
+ {
+ int v = etc1_decode_value(diff, inten, selector, packed_c);
+ uint err = labs(v - static_cast<int>(color));
+ //printf("err: %d - %u = %u\n",v,color,err);
+ if (err < best_error)
+ {
+ best_error = err;
+ best_packed_c = packed_c;
+ if (!best_error)
+ break;
+ }
+ }
+ RG_ETC1_ASSERT(best_error <= 255);
+ g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8));
+ }
+ }
+ }
+ }
+
+ uint expand5[32];
+ for(int i = 0; i < 32; i++)
+ expand5[i] = (i << 3) | (i >> 2);
+
+ for(int i = 0; i < 256 + 16; i++)
+ {
+ int v = clamp<int>(i - 8, 0, 255);
+ g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]);
+ }
+ }
+
+ // Packs solid color blocks efficiently using a set of small precomputed tables.
+ // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
+ static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params)
+ {
+ pack_params;
+ RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
+
+ static uint s_next_comp[4] = { 1, 2, 0, 1 };
+
+ uint best_error = cUINT32_MAX, best_i = 0;
+ int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
+
+ // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
+ for (uint i = 0; i < 3; i++)
+ {
+ const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
+
+ const int delta_range = 1;
+ for (int delta = -delta_range; delta <= delta_range; delta++)
+ {
+ const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
+
+ const uint16* pTable;
+ if (!c_plus_delta)
+ pTable = g_color8_to_etc_block_config_0_255[0];
+ else if (c_plus_delta == 255)
+ pTable = g_color8_to_etc_block_config_0_255[1];
+ else
+ pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
+
+ do
+ {
+ const uint x = *pTable++;
+
+#ifdef RG_ETC1_BUILD_DEBUG
+ const uint diff = x & 1;
+ const uint inten = (x >> 1) & 7;
+ const uint selector = (x >> 4) & 3;
+ const uint p0 = (x >> 8) & 255;
+ RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
+#endif
+
+ const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
+ uint16 p1 = pInverse_table[c1];
+ uint16 p2 = pInverse_table[c2];
+ const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
+ if (trial_error < best_error)
+ {
+ best_error = trial_error;
+ best_x = x;
+ best_packed_c1 = p1 & 0xFF;
+ best_packed_c2 = p2 & 0xFF;
+ best_i = i;
+ if (!best_error)
+ goto found_perfect_match;
+ }
+ } while (*pTable != 0xFFFF);
+ }
+ }
+found_perfect_match:
+
+ const uint diff = best_x & 1;
+ const uint inten = (best_x >> 1) & 7;
+
+ block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1));
+
+ const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3];
+ *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0;
+ *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0;
+
+ const uint best_packed_c0 = (best_x >> 8) & 255;
+ if (diff)
+ {
+ block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3);
+ block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3);
+ block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3);
+ }
+ else
+ {
+ block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4));
+ block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4));
+ block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4));
+ }
+
+ return best_error;
+ }
+
+ static uint pack_etc1_block_solid_color_constrained(
+ etc1_optimizer::results& results,
+ uint num_colors, const uint8* pColor,
+ etc1_pack_params& pack_params,
+ bool use_diff,
+ const color_quad_u8* pBase_color5_unscaled)
+ {
+ RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
+
+ pack_params;
+ static uint s_next_comp[4] = { 1, 2, 0, 1 };
+
+ uint best_error = cUINT32_MAX, best_i = 0;
+ int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
+
+ // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
+ for (uint i = 0; i < 3; i++)
+ {
+ const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
+
+ const int delta_range = 1;
+ for (int delta = -delta_range; delta <= delta_range; delta++)
+ {
+ const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
+
+ const uint16* pTable;
+ if (!c_plus_delta)
+ pTable = g_color8_to_etc_block_config_0_255[0];
+ else if (c_plus_delta == 255)
+ pTable = g_color8_to_etc_block_config_0_255[1];
+ else
+ pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
+
+ do
+ {
+ const uint x = *pTable++;
+ const uint diff = x & 1;
+ if (static_cast<uint>(use_diff) != diff)
+ {
+ if (*pTable == 0xFFFF)
+ break;
+ continue;
+ }
+
+ if ((diff) && (pBase_color5_unscaled))
+ {
+ const int p0 = (x >> 8) & 255;
+ int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]);
+ if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax))
+ {
+ if (*pTable == 0xFFFF)
+ break;
+ continue;
+ }
+ }
+
+#ifdef RG_ETC1_BUILD_DEBUG
+ {
+ const uint inten = (x >> 1) & 7;
+ const uint selector = (x >> 4) & 3;
+ const uint p0 = (x >> 8) & 255;
+ RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
+ }
+#endif
+
+ const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
+ uint16 p1 = pInverse_table[c1];
+ uint16 p2 = pInverse_table[c2];
+
+ if ((diff) && (pBase_color5_unscaled))
+ {
+ int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]);
+ int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]);
+ if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax))
+ {
+ if (*pTable == 0xFFFF)
+ break;
+ continue;
+ }
+ }
+
+ const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
+ if (trial_error < best_error)
+ {
+ best_error = trial_error;
+ best_x = x;
+ best_packed_c1 = p1 & 0xFF;
+ best_packed_c2 = p2 & 0xFF;
+ best_i = i;
+ if (!best_error)
+ goto found_perfect_match;
+ }
+ } while (*pTable != 0xFFFF);
+ }
+ }
+found_perfect_match:
+
+ if (best_error == cUINT32_MAX)
+ return best_error;
+
+ best_error *= num_colors;
+
+ results.m_n = num_colors;
+ results.m_block_color4 = !(best_x & 1);
+ results.m_block_inten_table = (best_x >> 1) & 7;
+ memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors);
+
+ const uint best_packed_c0 = (best_x >> 8) & 255;
+ results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0);
+ results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1);
+ results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2);
+ results.m_error = best_error;
+
+ return best_error;
+ }
+
+ // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
+ static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block)
+ {
+ int err[8],*ep1 = err,*ep2 = err+4;
+ uint8 *quant = g_quant5_tab+8;
+
+ memset(dest, 0xFF, sizeof(color_quad_u8)*16);
+
+ // process channels seperately
+ for(int ch=0;ch<3;ch++)
+ {
+ uint8* bp = (uint8*)block;
+ uint8* dp = (uint8*)dest;
+
+ bp += ch; dp += ch;
+
+ memset(err,0, sizeof(err));
+ for(int y = 0; y < 4; y++)
+ {
+ // pixel 0
+ dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
+ ep1[0] = bp[ 0] - dp[ 0];
+
+ // pixel 1
+ dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
+ ep1[1] = bp[ 4] - dp[ 4];
+
+ // pixel 2
+ dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
+ ep1[2] = bp[ 8] - dp[ 8];
+
+ // pixel 3
+ dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
+ ep1[3] = bp[12] - dp[12];
+
+ // advance to next line
+ int* tmp = ep1; ep1 = ep2; ep2 = tmp;
+ bp += 16;
+ dp += 16;
+ }
+ }
+ }
+
+ unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params)
+ {
+ const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba);
+ etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block);
+
+#ifdef RG_ETC1_BUILD_DEBUG
+ // Ensure all alpha values are 0xFF.
+ for (uint i = 0; i < 16; i++)
+ {
+ RG_ETC1_ASSERT(pSrc_pixels[i].a == 255);
+ }
+#endif
+
+ color_quad_u8 src_pixel0(pSrc_pixels[0]);
+
+ // Check for solid block.
+ const uint32 first_pixel_u32 = pSrc_pixels->m_u32;
+ int r;
+ for (r = 15; r >= 1; --r)
+ if (pSrc_pixels[r].m_u32 != first_pixel_u32)
+ break;
+ if (!r)
+ return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params));
+
+ color_quad_u8 dithered_pixels[16];
+ if (pack_params.m_dithering)
+ {
+ dither_block_555(dithered_pixels, pSrc_pixels);
+ pSrc_pixels = dithered_pixels;
+ }
+
+ etc1_optimizer optimizer;
+
+ uint64 best_error = cUINT64_MAX;
+ uint best_flip = false, best_use_color4 = false;
+
+ uint8 best_selectors[2][8];
+ etc1_optimizer::results best_results[2];
+ for (uint i = 0; i < 2; i++)
+ {
+ best_results[i].m_n = 8;
+ best_results[i].m_pSelectors = best_selectors[i];
+ }
+
+ uint8 selectors[3][8];
+ etc1_optimizer::results results[3];
+
+ for (uint i = 0; i < 3; i++)
+ {
+ results[i].m_n = 8;
+ results[i].m_pSelectors = selectors[i];
+ }
+
+ color_quad_u8 subblock_pixels[8];
+
+ etc1_optimizer::params params(pack_params);
+ params.m_num_src_pixels = 8;
+ params.m_pSrc_pixels = subblock_pixels;
+
+ for (uint flip = 0; flip < 2; flip++)
+ {
+ for (uint use_color4 = 0; use_color4 < 2; use_color4++)
+ {
+ uint64 trial_error = 0;
+
+ uint subblock;
+ for (subblock = 0; subblock < 2; subblock++)
+ {
+ if (flip)
+ memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
+ else
+ {
+ const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2;
+ subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12];
+ subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13];
+ }
+
+ results[2].m_error = cUINT64_MAX;
+ if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4)))
+ {
+ const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;
+ for (r = 7; r >= 1; --r)
+ if (subblock_pixels[r].m_u32 != subblock_pixel0_u32)
+ break;
+ if (!r)
+ {
+ pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL);
+ }
+ }
+
+ params.m_use_color4 = (use_color4 != 0);
+ params.m_constrain_against_base_color5 = false;
+
+ if ((!use_color4) && (subblock))
+ {
+ params.m_constrain_against_base_color5 = true;
+ params.m_base_color5 = results[0].m_block_color_unscaled;
+ }
+
+ if (params.m_quality == cHighQuality)
+ {
+ static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
+ params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4);
+ params.m_pScan_deltas = s_scan_delta_0_to_4;
+ }
+ else if (params.m_quality == cMediumQuality)
+ {
+ static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
+ params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1);
+ params.m_pScan_deltas = s_scan_delta_0_to_1;
+ }
+ else
+ {
+ static const int s_scan_delta_0[] = { 0 };
+ params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0);
+ params.m_pScan_deltas = s_scan_delta_0;
+ }
+
+ optimizer.init(params, results[subblock]);
+ if (!optimizer.compute())
+ break;
+
+ if (params.m_quality >= cMediumQuality)
+ {
+ // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions.
+ const uint refinement_error_thresh0 = 3000;
+ const uint refinement_error_thresh1 = 6000;
+ if (results[subblock].m_error > refinement_error_thresh0)
+ {
+ if (params.m_quality == cMediumQuality)
+ {
+ static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 };
+ params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3);
+ params.m_pScan_deltas = s_scan_delta_2_to_3;
+ }
+ else
+ {
+ static const int s_scan_delta_5_to_5[] = { -5, 5 };
+ static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 };
+ if (results[subblock].m_error > refinement_error_thresh1)
+ {
+ params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8);
+ params.m_pScan_deltas = s_scan_delta_5_to_8;
+ }
+ else
+ {
+ params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5);
+ params.m_pScan_deltas = s_scan_delta_5_to_5;
+ }
+ }
+
+ if (!optimizer.compute())
+ break;
+ }
+
+ if (results[2].m_error < results[subblock].m_error)
+ results[subblock] = results[2];
+ }
+
+ trial_error += results[subblock].m_error;
+ if (trial_error >= best_error)
+ break;
+ }
+
+ if (subblock < 2)
+ continue;
+
+ best_error = trial_error;
+ best_results[0] = results[0];
+ best_results[1] = results[1];
+ best_flip = flip;
+ best_use_color4 = use_color4;
+
+ } // use_color4
+
+ } // flip
+
+ int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r;
+ int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g;
+ int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b;
+ RG_ETC1_ASSERT(best_use_color4 || ((rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)));
+
+ if (best_use_color4)
+ {
+ dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4));
+ dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4));
+ dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4));
+ }
+ else
+ {
+ if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr);
+ if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg);
+ if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db);
+ }
+
+ dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip );
+
+ uint selector0 = 0, selector1 = 0;
+ if (best_flip)
+ {
+ // flipped:
+ // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
+ // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
+ //
+ // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
+ // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
+ const uint8* pSelectors0 = best_results[0].m_pSelectors;
+ const uint8* pSelectors1 = best_results[1].m_pSelectors;
+ for (int x = 3; x >= 0; --x)
+ {
+ uint b;
+ b = g_selector_index_to_etc1[pSelectors1[4 + x]];
+ selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+ b = g_selector_index_to_etc1[pSelectors1[x]];
+ selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+ b = g_selector_index_to_etc1[pSelectors0[4 + x]];
+ selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+ b = g_selector_index_to_etc1[pSelectors0[x]];
+ selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+ }
+ }
+ else
+ {
+ // non-flipped:
+ // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
+ // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
+ //
+ // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
+ // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
+ for (int subblock = 1; subblock >= 0; --subblock)
+ {
+ const uint8* pSelectors = best_results[subblock].m_pSelectors + 4;
+ for (uint i = 0; i < 2; i++)
+ {
+ uint b;
+ b = g_selector_index_to_etc1[pSelectors[3]];
+ selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+ b = g_selector_index_to_etc1[pSelectors[2]];
+ selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+ b = g_selector_index_to_etc1[pSelectors[1]];
+ selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+ b = g_selector_index_to_etc1[pSelectors[0]];
+ selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1);
+
+ pSelectors -= 4;
+ }
+ }
+ }
+
+ dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF);
+ dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF);
+
+ return static_cast<unsigned int>(best_error);
+ }
+
+} // namespace rg_etc1
diff --git a/drivers/etc1/rg_etc1.h b/drivers/etc1/rg_etc1.h
index 9a701506fd..9ce89a6cc6 100644
--- a/drivers/etc1/rg_etc1.h
+++ b/drivers/etc1/rg_etc1.h
@@ -1,76 +1,76 @@
-// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
-// Please see ZLIB license at the end of this file.
-#pragma once
-
-namespace rg_etc1
-{
- // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels.
- // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping.
- // This function is thread safe, and does not dynamically allocate any memory.
- // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255.
- bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false);
-
- // Quality setting = the higher the quality, the slower.
- // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality).
- enum etc1_quality
- {
- cLowQuality,
- cMediumQuality,
- cHighQuality,
- };
-
- struct etc1_pack_params
- {
- etc1_quality m_quality;
- bool m_dithering;
-
- inline etc1_pack_params()
- {
- clear();
- }
-
- void clear()
- {
- m_quality = cHighQuality;
- m_dithering = false;
- }
- };
-
- // Important: pack_etc1_block_init() must be called before calling pack_etc1_block().
- void pack_etc1_block_init();
-
- // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block.
- // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255.
- // Returns squared error of result.
- // This function is thread safe, and does not dynamically allocate any memory.
- // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE.
- unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params);
-
-} // namespace rg_etc1
-
-//------------------------------------------------------------------------------
-//
-// rg_etc1 uses the ZLIB license:
-// http://opensource.org/licenses/Zlib
-//
-// Copyright (c) 2012 Rich Geldreich
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-// 1. The origin of this software must not be misrepresented; you must not
-// claim that you wrote the original software. If you use this software
-// in a product, an acknowledgment in the product documentation would be
-// appreciated but is not required.
-//
-// 2. Altered source versions must be plainly marked as such, and must not be
-// misrepresented as being the original software.
-//
-// 3. This notice may not be removed or altered from any source distribution.
-//
-//------------------------------------------------------------------------------
+// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
+// Please see ZLIB license at the end of this file.
+#pragma once
+
+namespace rg_etc1
+{
+ // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels.
+ // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping.
+ // This function is thread safe, and does not dynamically allocate any memory.
+ // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255.
+ bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false);
+
+ // Quality setting = the higher the quality, the slower.
+ // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality).
+ enum etc1_quality
+ {
+ cLowQuality,
+ cMediumQuality,
+ cHighQuality,
+ };
+
+ struct etc1_pack_params
+ {
+ etc1_quality m_quality;
+ bool m_dithering;
+
+ inline etc1_pack_params()
+ {
+ clear();
+ }
+
+ void clear()
+ {
+ m_quality = cHighQuality;
+ m_dithering = false;
+ }
+ };
+
+ // Important: pack_etc1_block_init() must be called before calling pack_etc1_block().
+ void pack_etc1_block_init();
+
+ // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block.
+ // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255.
+ // Returns squared error of result.
+ // This function is thread safe, and does not dynamically allocate any memory.
+ // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE.
+ unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params);
+
+} // namespace rg_etc1
+
+//------------------------------------------------------------------------------
+//
+// rg_etc1 uses the ZLIB license:
+// http://opensource.org/licenses/Zlib
+//
+// Copyright (c) 2012 Rich Geldreich
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source distribution.
+//
+//------------------------------------------------------------------------------
diff --git a/drivers/gles2/rasterizer_gles2.cpp b/drivers/gles2/rasterizer_gles2.cpp
index d3a5f3b5bc..3d75ed29f3 100644
--- a/drivers/gles2/rasterizer_gles2.cpp
+++ b/drivers/gles2/rasterizer_gles2.cpp
@@ -4145,7 +4145,7 @@ void RasterizerGLES2::begin_frame() {
//fragment_lighting=Globals::get_singleton()->get("rasterizer/use_fragment_lighting");
#ifdef TOOLS_ENABLED
- canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("rasterizer/use_pixel_snap",false));
+ canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("display/use_2d_pixel_snap",false));
shadow_filter=ShadowFilterTechnique(int(Globals::get_singleton()->get("rasterizer/shadow_filter")));
#endif
@@ -4160,7 +4160,6 @@ void RasterizerGLES2::begin_frame() {
time_delta=time-last_time;
last_time=time;
frame++;
- clear_viewport(Color(1,0,0.5));
_rinfo.vertex_count=0;
_rinfo.object_count=0;
@@ -5970,6 +5969,10 @@ void RasterizerGLES2::_render(const Geometry *p_geometry,const Material *p_mater
if (element_count==0)
return;
+ if (mm->visible>=0) {
+ element_count=MIN(element_count,mm->visible);
+ }
+
const MultiMesh::Element *elements=&mm->elements[0];
_rinfo.vertex_count+=s->array_len*element_count;
@@ -10804,7 +10807,7 @@ void RasterizerGLES2::init() {
copy_shader.set_conditional(CopyShaderGLES2::USE_8BIT_HDR,!use_fp16_fb);
canvas_shader.set_conditional(CanvasShaderGLES2::USE_DEPTH_SHADOWS,read_depth_supported);
- canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("rasterizer/use_pixel_snap",false));
+ canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("display/use_2d_pixel_snap",false));
npo2_textures_available=true;
//fragment_lighting=false;
@@ -11188,6 +11191,12 @@ RasterizerGLES2::RasterizerGLES2(bool p_compress_arrays,bool p_keep_ram_copy,boo
tc0_idx=0;
};
+void RasterizerGLES2::restore_framebuffer() {
+
+ glBindFramebuffer(GL_FRAMEBUFFER, base_framebuffer);
+
+}
+
RasterizerGLES2::~RasterizerGLES2() {
memdelete_arr(skinned_buffer);
diff --git a/drivers/gles2/rasterizer_gles2.h b/drivers/gles2/rasterizer_gles2.h
index d337ecfb64..f759e84b53 100644
--- a/drivers/gles2/rasterizer_gles2.h
+++ b/drivers/gles2/rasterizer_gles2.h
@@ -1695,6 +1695,8 @@ public:
void reload_vram();
virtual bool has_feature(VS::Features p_feature) const;
+
+ virtual void restore_framebuffer();
static RasterizerGLES2* get_singleton();
diff --git a/drivers/gles2/shader_compiler_gles2.cpp b/drivers/gles2/shader_compiler_gles2.cpp
index 157f2e398b..d57512c936 100644
--- a/drivers/gles2/shader_compiler_gles2.cpp
+++ b/drivers/gles2/shader_compiler_gles2.cpp
@@ -132,18 +132,18 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a
SL::BlockNode *bnode=(SL::BlockNode*)p_node;
//variables
- code+="{"ENDL;
+ code+="{" ENDL;
for(Map<StringName,SL::DataType>::Element *E=bnode->variables.front();E;E=E->next()) {
- code+=_mktab(p_level)+_typestr(E->value())+" "+replace_string(E->key())+";"ENDL;
+ code+=_mktab(p_level)+_typestr(E->value())+" "+replace_string(E->key())+";" ENDL;
}
for(int i=0;i<bnode->statements.size();i++) {
- code+=_mktab(p_level)+dump_node_code(bnode->statements[i],p_level)+";"ENDL;
+ code+=_mktab(p_level)+dump_node_code(bnode->statements[i],p_level)+";" ENDL;
}
- code+="}"ENDL;
+ code+="}" ENDL;
} break;
case SL::Node::TYPE_VARIABLE: {
@@ -489,15 +489,15 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a
SL::ControlFlowNode *cfnode=(SL::ControlFlowNode*)p_node;
if (cfnode->flow_op==SL::FLOW_OP_IF) {
- code+="if ("+dump_node_code(cfnode->statements[0],p_level)+") {"ENDL;
+ code+="if ("+dump_node_code(cfnode->statements[0],p_level)+") {" ENDL;
code+=dump_node_code(cfnode->statements[1],p_level+1);
if (cfnode->statements.size()==3) {
- code+="} else {"ENDL;
+ code+="} else {" ENDL;
code+=dump_node_code(cfnode->statements[2],p_level+1);
}
- code+="}"ENDL;
+ code+="}" ENDL;
} else if (cfnode->flow_op==SL::FLOW_OP_RETURN) {
@@ -560,7 +560,7 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) {
ubase=uniforms->size();
for(Map<StringName,SL::Uniform>::Element *E=p_program->uniforms.front();E;E=E->next()) {
- String uline="uniform "+_typestr(E->get().type)+" _"+E->key().operator String()+";"ENDL;
+ String uline="uniform "+_typestr(E->get().type)+" _"+E->key().operator String()+";" ENDL;
global_code+=uline;
if (uniforms) {
@@ -593,10 +593,10 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) {
header+=_typestr(fnode->arguments[i].type)+" "+replace_string(fnode->arguments[i].name);
}
- header+=") {"ENDL;
+ header+=") {" ENDL;
String fcode=header;
fcode+=dump_node_code(fnode->body,1);
- fcode+="}"ENDL;
+ fcode+="}" ENDL;
global_code+=fcode;
}
@@ -605,7 +605,7 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) {
StringName varname=E->key();
String newvarname=replace_string(varname);
- global_code+="uniform "+_typestr(E->get())+" "+newvarname+";"ENDL;
+ global_code+="uniform "+_typestr(E->get())+" "+newvarname+";" ENDL;
}*/
code=dump_node_code(p_program,0);
diff --git a/drivers/mpc/audio_stream_mpc.cpp b/drivers/mpc/audio_stream_mpc.cpp
index 67f21f922c..fe6aa05d00 100644
--- a/drivers/mpc/audio_stream_mpc.cpp
+++ b/drivers/mpc/audio_stream_mpc.cpp
@@ -1,7 +1,7 @@
#include "audio_stream_mpc.h"
-Error AudioStreamMPC::_open_file() {
+Error AudioStreamPlaybackMPC::_open_file() {
if (f) {
memdelete(f);
@@ -41,7 +41,7 @@ Error AudioStreamMPC::_open_file() {
return OK;
}
-void AudioStreamMPC::_close_file() {
+void AudioStreamPlaybackMPC::_close_file() {
if (f) {
memdelete(f);
@@ -52,7 +52,7 @@ void AudioStreamMPC::_close_file() {
data_ofs=0;
}
-int AudioStreamMPC::_read_file(void *p_dst,int p_bytes) {
+int AudioStreamPlaybackMPC::_read_file(void *p_dst,int p_bytes) {
if (f)
return f->get_buffer((uint8_t*)p_dst,p_bytes);
@@ -68,7 +68,7 @@ int AudioStreamMPC::_read_file(void *p_dst,int p_bytes) {
return p_bytes;
}
-bool AudioStreamMPC::_seek_file(int p_pos){
+bool AudioStreamPlaybackMPC::_seek_file(int p_pos){
if (p_pos<0 || p_pos>streamlen)
return false;
@@ -83,7 +83,7 @@ bool AudioStreamMPC::_seek_file(int p_pos){
return true;
}
-int AudioStreamMPC::_tell_file() const{
+int AudioStreamPlaybackMPC::_tell_file() const{
if (f)
return f->get_pos();
@@ -93,13 +93,13 @@ int AudioStreamMPC::_tell_file() const{
}
-int AudioStreamMPC::_sizeof_file() const{
+int AudioStreamPlaybackMPC::_sizeof_file() const{
//print_line("sizeof file, get: "+itos(streamlen));
return streamlen;
}
-bool AudioStreamMPC::_canseek_file() const{
+bool AudioStreamPlaybackMPC::_canseek_file() const{
//print_line("canseek file, get true");
return true;
@@ -107,51 +107,46 @@ bool AudioStreamMPC::_canseek_file() const{
/////////////////////
-mpc_int32_t AudioStreamMPC::_mpc_read(mpc_reader *p_reader,void *p_dst, mpc_int32_t p_bytes) {
+mpc_int32_t AudioStreamPlaybackMPC::_mpc_read(mpc_reader *p_reader,void *p_dst, mpc_int32_t p_bytes) {
- AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+ AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
return smpc->_read_file(p_dst,p_bytes);
}
-mpc_bool_t AudioStreamMPC::_mpc_seek(mpc_reader *p_reader,mpc_int32_t p_offset) {
+mpc_bool_t AudioStreamPlaybackMPC::_mpc_seek(mpc_reader *p_reader,mpc_int32_t p_offset) {
- AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+ AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
return smpc->_seek_file(p_offset);
}
-mpc_int32_t AudioStreamMPC::_mpc_tell(mpc_reader *p_reader) {
+mpc_int32_t AudioStreamPlaybackMPC::_mpc_tell(mpc_reader *p_reader) {
- AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+ AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
return smpc->_tell_file();
}
-mpc_int32_t AudioStreamMPC::_mpc_get_size(mpc_reader *p_reader) {
+mpc_int32_t AudioStreamPlaybackMPC::_mpc_get_size(mpc_reader *p_reader) {
- AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+ AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
return smpc->_sizeof_file();
}
-mpc_bool_t AudioStreamMPC::_mpc_canseek(mpc_reader *p_reader) {
+mpc_bool_t AudioStreamPlaybackMPC::_mpc_canseek(mpc_reader *p_reader) {
- AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+ AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
return smpc->_canseek_file();
}
-bool AudioStreamMPC::_can_mix() const {
- return /*active &&*/ !paused;
-}
-
-
-void AudioStreamMPC::update() {
+int AudioStreamPlaybackMPC::mix(int16_t* p_bufer,int p_frames) {
if (!active || paused)
- return;
+ return 0;
- int todo=get_todo();
+ int todo=p_frames;
while(todo>MPC_DECODER_BUFFER_LENGTH/si.channels) {
@@ -162,7 +157,7 @@ void AudioStreamMPC::update() {
mpc_status err = mpc_demux_decode(demux, &frame);
if (frame.bits!=-1) {
- int16_t *dst_buff = get_write_buffer();
+ int16_t *dst_buff = p_bufer;
#ifdef MPC_FIXED_POINT
@@ -185,21 +180,21 @@ void AudioStreamMPC::update() {
#endif
int frames = frame.samples;
- write(frames);
+ p_bufer+=si.channels*frames;
todo-=frames;
} else {
if (err != MPC_STATUS_OK) {
stop();
- ERR_EXPLAIN("Error decoding MPC");
- ERR_FAIL();
+ ERR_PRINT("Error decoding MPC");
+ break;
} else {
//finished
if (!loop) {
stop();
- return;
+ break;
} else {
@@ -213,9 +208,11 @@ void AudioStreamMPC::update() {
}
}
}
+
+ return p_frames-todo;
}
-Error AudioStreamMPC::_reload() {
+Error AudioStreamPlaybackMPC::_reload() {
ERR_FAIL_COND_V(demux!=NULL, ERR_FILE_ALREADY_IN_USE);
@@ -224,31 +221,40 @@ Error AudioStreamMPC::_reload() {
demux = mpc_demux_init(&reader);
ERR_FAIL_COND_V(!demux,ERR_CANT_CREATE);
-
mpc_demux_get_info(demux, &si);
- _setup(si.channels,si.sample_freq,MPC_DECODER_BUFFER_LENGTH*2/si.channels);
return OK;
}
-void AudioStreamMPC::set_file(const String& p_file) {
+void AudioStreamPlaybackMPC::set_file(const String& p_file) {
file=p_file;
+ Error err = _open_file();
+ ERR_FAIL_COND(err!=OK);
+ demux = mpc_demux_init(&reader);
+ ERR_FAIL_COND(!demux);
+ mpc_demux_get_info(demux, &si);
+ stream_min_size=MPC_DECODER_BUFFER_LENGTH*2/si.channels;
+ stream_rate=si.sample_freq;
+ stream_channels=si.channels;
+
+ mpc_demux_exit(demux);
+ demux=NULL;
+ _close_file();
+
}
-String AudioStreamMPC::get_file() const {
+String AudioStreamPlaybackMPC::get_file() const {
return file;
}
-void AudioStreamMPC::play() {
+void AudioStreamPlaybackMPC::play(float p_offset) {
- _THREAD_SAFE_METHOD_
-
if (active)
stop();
active=false;
@@ -262,9 +268,9 @@ void AudioStreamMPC::play() {
}
-void AudioStreamMPC::stop() {
+void AudioStreamPlaybackMPC::stop() {
+
- _THREAD_SAFE_METHOD_
if (!active)
return;
if (demux) {
@@ -275,70 +281,58 @@ void AudioStreamMPC::stop() {
active=false;
}
-bool AudioStreamMPC::is_playing() const {
+bool AudioStreamPlaybackMPC::is_playing() const {
- return active || (get_total() - get_todo() -1 > 0);
+ return active;
}
-void AudioStreamMPC::set_paused(bool p_paused) {
- paused=p_paused;
-}
-bool AudioStreamMPC::is_paused(bool p_paused) const {
-
- return paused;
-}
-
-void AudioStreamMPC::set_loop(bool p_enable) {
+void AudioStreamPlaybackMPC::set_loop(bool p_enable) {
loop=p_enable;
}
-bool AudioStreamMPC::has_loop() const {
+bool AudioStreamPlaybackMPC::has_loop() const {
return loop;
}
-float AudioStreamMPC::get_length() const {
+float AudioStreamPlaybackMPC::get_length() const {
return 0;
}
-String AudioStreamMPC::get_stream_name() const {
+String AudioStreamPlaybackMPC::get_stream_name() const {
return "";
}
-int AudioStreamMPC::get_loop_count() const {
+int AudioStreamPlaybackMPC::get_loop_count() const {
return 0;
}
-float AudioStreamMPC::get_pos() const {
+float AudioStreamPlaybackMPC::get_pos() const {
return 0;
}
-void AudioStreamMPC::seek_pos(float p_time) {
+void AudioStreamPlaybackMPC::seek_pos(float p_time) {
}
-AudioStream::UpdateMode AudioStreamMPC::get_update_mode() const {
-
- return UPDATE_THREAD;
-}
-void AudioStreamMPC::_bind_methods() {
+void AudioStreamPlaybackMPC::_bind_methods() {
- ObjectTypeDB::bind_method(_MD("set_file","name"),&AudioStreamMPC::set_file);
- ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamMPC::get_file);
+ ObjectTypeDB::bind_method(_MD("set_file","name"),&AudioStreamPlaybackMPC::set_file);
+ ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamPlaybackMPC::get_file);
ADD_PROPERTYNZ( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"mpc"), _SCS("set_file"), _SCS("get_file"));
}
-AudioStreamMPC::AudioStreamMPC() {
+AudioStreamPlaybackMPC::AudioStreamPlaybackMPC() {
- preload=true;
+ preload=false;
f=NULL;
streamlen=0;
data_ofs=0;
@@ -356,7 +350,7 @@ AudioStreamMPC::AudioStreamMPC() {
}
-AudioStreamMPC::~AudioStreamMPC() {
+AudioStreamPlaybackMPC::~AudioStreamPlaybackMPC() {
stop();
diff --git a/drivers/mpc/audio_stream_mpc.h b/drivers/mpc/audio_stream_mpc.h
index 8fb0ed13de..122d0d0bbb 100644
--- a/drivers/mpc/audio_stream_mpc.h
+++ b/drivers/mpc/audio_stream_mpc.h
@@ -1,18 +1,17 @@
#ifndef AUDIO_STREAM_MPC_H
#define AUDIO_STREAM_MPC_H
-#include "scene/resources/audio_stream_resampled.h"
+#include "scene/resources/audio_stream.h"
#include "os/file_access.h"
#include "mpc/mpcdec.h"
#include "os/thread_safe.h"
#include "io/resource_loader.h"
//#include "../libmpcdec/decoder.h"
//#include "../libmpcdec/internal.h"
-class AudioStreamMPC : public AudioStreamResampled {
- OBJ_TYPE( AudioStreamMPC, AudioStreamResampled );
+class AudioStreamPlaybackMPC : public AudioStreamPlayback {
- _THREAD_SAFE_CLASS_
+ OBJ_TYPE( AudioStreamPlaybackMPC, AudioStreamPlayback );
bool preload;
FileAccess *f;
@@ -39,7 +38,9 @@ class AudioStreamMPC : public AudioStreamResampled {
static mpc_int32_t _mpc_get_size(mpc_reader *p_reader);
static mpc_bool_t _mpc_canseek(mpc_reader *p_reader);
- virtual bool _can_mix() const ;
+ int stream_min_size;
+ int stream_rate;
+ int stream_channels;
protected:
Error _open_file();
@@ -59,12 +60,10 @@ public:
void set_file(const String& p_file);
String get_file() const;
- virtual void play();
+ virtual void play(float p_offset=0);
virtual void stop();
virtual bool is_playing() const;
- virtual void set_paused(bool p_paused);
- virtual bool is_paused(bool p_paused) const;
virtual void set_loop(bool p_enable);
virtual bool has_loop() const;
@@ -78,13 +77,35 @@ public:
virtual float get_pos() const;
virtual void seek_pos(float p_time);
- virtual UpdateMode get_update_mode() const;
- virtual void update();
+ virtual int get_channels() const { return stream_channels; }
+ virtual int get_mix_rate() const { return stream_rate; }
- AudioStreamMPC();
- ~AudioStreamMPC();
+ virtual int get_minimum_buffer_size() const { return stream_min_size; }
+ virtual int mix(int16_t* p_bufer,int p_frames);
+
+ virtual void set_loop_restart_time(float p_time) { }
+
+ AudioStreamPlaybackMPC();
+ ~AudioStreamPlaybackMPC();
};
+class AudioStreamMPC : public AudioStream {
+
+ OBJ_TYPE( AudioStreamMPC, AudioStream );
+
+ String file;
+public:
+
+ Ref<AudioStreamPlayback> instance_playback() {
+ Ref<AudioStreamPlaybackMPC> pb = memnew( AudioStreamPlaybackMPC );
+ pb->set_file(file);
+ return pb;
+ }
+
+ void set_file(const String& p_file) { file=p_file; }
+
+
+};
class ResourceFormatLoaderAudioStreamMPC : public ResourceFormatLoader {
public:
diff --git a/drivers/nedmalloc/malloc.c.h b/drivers/nedmalloc/malloc.c.h
index b9e65637d5..4fec5cc9d4 100644
--- a/drivers/nedmalloc/malloc.c.h
+++ b/drivers/nedmalloc/malloc.c.h
@@ -1,5814 +1,5814 @@
-#ifdef NEDMALLOC_ENABLED
-/*
- This is a version (aka dlmalloc) of malloc/free/realloc written by
- Doug Lea and released to the public domain, as explained at
- http://creativecommons.org/licenses/publicdomain. Send questions,
- comments, complaints, performance data, etc to dl@cs.oswego.edu
-
-* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee)
-
- Note: There may be an updated version of this malloc obtainable at
- ftp://gee.cs.oswego.edu/pub/misc/malloc.c
- Check before installing!
-
-* Quickstart
-
- This library is all in one file to simplify the most common usage:
- ftp it, compile it (-O3), and link it into another program. All of
- the compile-time options default to reasonable values for use on
- most platforms. You might later want to step through various
- compile-time and dynamic tuning options.
-
- For convenience, an include file for code using this malloc is at:
- ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h
- You don't really need this .h file unless you call functions not
- defined in your system include files. The .h file contains only the
- excerpts from this file needed for using this malloc on ANSI C/C++
- systems, so long as you haven't changed compile-time options about
- naming and tuning parameters. If you do, then you can create your
- own malloc.h that does include all settings by cutting at the point
- indicated below. Note that you may already by default be using a C
- library containing a malloc that is based on some version of this
- malloc (for example in linux). You might still want to use the one
- in this file to customize settings or to avoid overheads associated
- with library versions.
-
-* Vital statistics:
-
- Supported pointer/size_t representation: 4 or 8 bytes
- size_t MUST be an unsigned type of the same width as
- pointers. (If you are using an ancient system that declares
- size_t as a signed type, or need it to be a different width
- than pointers, you can use a previous release of this malloc
- (e.g. 2.7.2) supporting these.)
-
- Alignment: 8 bytes (default)
- This suffices for nearly all current machines and C compilers.
- However, you can define MALLOC_ALIGNMENT to be wider than this
- if necessary (up to 128bytes), at the expense of using more space.
-
- Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes)
- 8 or 16 bytes (if 8byte sizes)
- Each malloced chunk has a hidden word of overhead holding size
- and status information, and additional cross-check word
- if FOOTERS is defined.
-
- Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead)
- 8-byte ptrs: 32 bytes (including overhead)
-
- Even a request for zero bytes (i.e., malloc(0)) returns a
- pointer to something of the minimum allocatable size.
- The maximum overhead wastage (i.e., number of extra bytes
- allocated than were requested in malloc) is less than or equal
- to the minimum size, except for requests >= mmap_threshold that
- are serviced via mmap(), where the worst case wastage is about
- 32 bytes plus the remainder from a system page (the minimal
- mmap unit); typically 4096 or 8192 bytes.
-
- Security: static-safe; optionally more or less
- The "security" of malloc refers to the ability of malicious
- code to accentuate the effects of errors (for example, freeing
- space that is not currently malloc'ed or overwriting past the
- ends of chunks) in code that calls malloc. This malloc
- guarantees not to modify any memory locations below the base of
- heap, i.e., static variables, even in the presence of usage
- errors. The routines additionally detect most improper frees
- and reallocs. All this holds as long as the static bookkeeping
- for malloc itself is not corrupted by some other means. This
- is only one aspect of security -- these checks do not, and
- cannot, detect all possible programming errors.
-
- If FOOTERS is defined nonzero, then each allocated chunk
- carries an additional check word to verify that it was malloced
- from its space. These check words are the same within each
- execution of a program using malloc, but differ across
- executions, so externally crafted fake chunks cannot be
- freed. This improves security by rejecting frees/reallocs that
- could corrupt heap memory, in addition to the checks preventing
- writes to statics that are always on. This may further improve
- security at the expense of time and space overhead. (Note that
- FOOTERS may also be worth using with MSPACES.)
-
- By default detected errors cause the program to abort (calling
- "abort()"). You can override this to instead proceed past
- errors by defining PROCEED_ON_ERROR. In this case, a bad free
- has no effect, and a malloc that encounters a bad address
- caused by user overwrites will ignore the bad address by
- dropping pointers and indices to all known memory. This may
- be appropriate for programs that should continue if at all
- possible in the face of programming errors, although they may
- run out of memory because dropped memory is never reclaimed.
-
- If you don't like either of these options, you can define
- CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
- else. And if if you are sure that your program using malloc has
- no errors or vulnerabilities, you can define INSECURE to 1,
- which might (or might not) provide a small performance improvement.
-
- Thread-safety: NOT thread-safe unless USE_LOCKS defined
- When USE_LOCKS is defined, each public call to malloc, free,
- etc is surrounded with either a pthread mutex or a win32
- spinlock (depending on WIN32). This is not especially fast, and
- can be a major bottleneck. It is designed only to provide
- minimal protection in concurrent environments, and to provide a
- basis for extensions. If you are using malloc in a concurrent
- program, consider instead using nedmalloc
- (http://www.nedprod.com/programs/portable/nedmalloc/) or
- ptmalloc (See http://www.malloc.de), which are derived
- from versions of this malloc.
-
- System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
- This malloc can use unix sbrk or any emulation (invoked using
- the CALL_MORECORE macro) and/or mmap/munmap or any emulation
- (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
- memory. On most unix systems, it tends to work best if both
- MORECORE and MMAP are enabled. On Win32, it uses emulations
- based on VirtualAlloc. It also uses common C library functions
- like memset.
-
- Compliance: I believe it is compliant with the Single Unix Specification
- (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
- others as well.
-
-* Overview of algorithms
-
- This is not the fastest, most space-conserving, most portable, or
- most tunable malloc ever written. However it is among the fastest
- while also being among the most space-conserving, portable and
- tunable. Consistent balance across these factors results in a good
- general-purpose allocator for malloc-intensive programs.
-
- In most ways, this malloc is a best-fit allocator. Generally, it
- chooses the best-fitting existing chunk for a request, with ties
- broken in approximately least-recently-used order. (This strategy
- normally maintains low fragmentation.) However, for requests less
- than 256bytes, it deviates from best-fit when there is not an
- exactly fitting available chunk by preferring to use space adjacent
- to that used for the previous small request, as well as by breaking
- ties in approximately most-recently-used order. (These enhance
- locality of series of small allocations.) And for very large requests
- (>= 256Kb by default), it relies on system memory mapping
- facilities, if supported. (This helps avoid carrying around and
- possibly fragmenting memory used only for large chunks.)
-
- All operations (except malloc_stats and mallinfo) have execution
- times that are bounded by a constant factor of the number of bits in
- a size_t, not counting any clearing in calloc or copying in realloc,
- or actions surrounding MORECORE and MMAP that have times
- proportional to the number of non-contiguous regions returned by
- system allocation routines, which is often just 1. In real-time
- applications, you can optionally suppress segment traversals using
- NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
- system allocators return non-contiguous spaces, at the typical
- expense of carrying around more memory and increased fragmentation.
-
- The implementation is not very modular and seriously overuses
- macros. Perhaps someday all C compilers will do as good a job
- inlining modular code as can now be done by brute-force expansion,
- but now, enough of them seem not to.
-
- Some compilers issue a lot of warnings about code that is
- dead/unreachable only on some platforms, and also about intentional
- uses of negation on unsigned types. All known cases of each can be
- ignored.
-
- For a longer but out of date high-level description, see
- http://gee.cs.oswego.edu/dl/html/malloc.html
-
-* MSPACES
- If MSPACES is defined, then in addition to malloc, free, etc.,
- this file also defines mspace_malloc, mspace_free, etc. These
- are versions of malloc routines that take an "mspace" argument
- obtained using create_mspace, to control all internal bookkeeping.
- If ONLY_MSPACES is defined, only these versions are compiled.
- So if you would like to use this allocator for only some allocations,
- and your system malloc for others, you can compile with
- ONLY_MSPACES and then do something like...
- static mspace mymspace = create_mspace(0,0); // for example
- #define mymalloc(bytes) mspace_malloc(mymspace, bytes)
-
- (Note: If you only need one instance of an mspace, you can instead
- use "USE_DL_PREFIX" to relabel the global malloc.)
-
- You can similarly create thread-local allocators by storing
- mspaces as thread-locals. For example:
- static __thread mspace tlms = 0;
- void* tlmalloc(size_t bytes) {
- if (tlms == 0) tlms = create_mspace(0, 0);
- return mspace_malloc(tlms, bytes);
- }
- void tlfree(void* mem) { mspace_free(tlms, mem); }
-
- Unless FOOTERS is defined, each mspace is completely independent.
- You cannot allocate from one and free to another (although
- conformance is only weakly checked, so usage errors are not always
- caught). If FOOTERS is defined, then each chunk carries around a tag
- indicating its originating mspace, and frees are directed to their
- originating spaces.
-
- ------------------------- Compile-time options ---------------------------
-
-Be careful in setting #define values for numerical constants of type
-size_t. On some systems, literal values are not automatically extended
-to size_t precision unless they are explicitly casted. You can also
-use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
-
-WIN32 default: defined if _WIN32 defined
- Defining WIN32 sets up defaults for MS environment and compilers.
- Otherwise defaults are for unix. Beware that there seem to be some
- cases where this malloc might not be a pure drop-in replacement for
- Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
- SetDIBits()) may be due to bugs in some video driver implementations
- when pixel buffers are malloc()ed, and the region spans more than
- one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
- default granularity, pixel buffers may straddle virtual allocation
- regions more often than when using the Microsoft allocator. You can
- avoid this by using VirtualAlloc() and VirtualFree() for all pixel
- buffers rather than using malloc(). If this is not possible,
- recompile this malloc with a larger DEFAULT_GRANULARITY.
-
-MALLOC_ALIGNMENT default: (size_t)8
- Controls the minimum alignment for malloc'ed chunks. It must be a
- power of two and at least 8, even on machines for which smaller
- alignments would suffice. It may be defined as larger than this
- though. Note however that code and data structures are optimized for
- the case of 8-byte alignment.
-
-MSPACES default: 0 (false)
- If true, compile in support for independent allocation spaces.
- This is only supported if HAVE_MMAP is true.
-
-ONLY_MSPACES default: 0 (false)
- If true, only compile in mspace versions, not regular versions.
-
-USE_LOCKS default: 0 (false)
- Causes each call to each public routine to be surrounded with
- pthread or WIN32 mutex lock/unlock. (If set true, this can be
- overridden on a per-mspace basis for mspace versions.) If set to a
- non-zero value other than 1, locks are used, but their
- implementation is left out, so lock functions must be supplied manually,
- as described below.
-
-USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC
- If true, uses custom spin locks for locking. This is currently
- supported only for x86 platforms using gcc or recent MS compilers.
- Otherwise, posix locks or win32 critical sections are used.
-
-FOOTERS default: 0
- If true, provide extra checking and dispatching by placing
- information in the footers of allocated chunks. This adds
- space and time overhead.
-
-INSECURE default: 0
- If true, omit checks for usage errors and heap space overwrites.
-
-USE_DL_PREFIX default: NOT defined
- Causes compiler to prefix all public routines with the string 'dl'.
- This can be useful when you only want to use this malloc in one part
- of a program, using your regular system malloc elsewhere.
-
-ABORT default: defined as abort()
- Defines how to abort on failed checks. On most systems, a failed
- check cannot die with an "assert" or even print an informative
- message, because the underlying print routines in turn call malloc,
- which will fail again. Generally, the best policy is to simply call
- abort(). It's not very useful to do more than this because many
- errors due to overwriting will show up as address faults (null, odd
- addresses etc) rather than malloc-triggered checks, so will also
- abort. Also, most compilers know that abort() does not return, so
- can better optimize code conditionally calling it.
-
-PROCEED_ON_ERROR default: defined as 0 (false)
- Controls whether detected bad addresses cause them to bypassed
- rather than aborting. If set, detected bad arguments to free and
- realloc are ignored. And all bookkeeping information is zeroed out
- upon a detected overwrite of freed heap space, thus losing the
- ability to ever return it from malloc again, but enabling the
- application to proceed. If PROCEED_ON_ERROR is defined, the
- static variable malloc_corruption_error_count is compiled in
- and can be examined to see if errors have occurred. This option
- generates slower code than the default abort policy.
-
-DEBUG default: NOT defined
- The DEBUG setting is mainly intended for people trying to modify
- this code or diagnose problems when porting to new platforms.
- However, it may also be able to better isolate user errors than just
- using runtime checks. The assertions in the check routines spell
- out in more detail the assumptions and invariants underlying the
- algorithms. The checking is fairly extensive, and will slow down
- execution noticeably. Calling malloc_stats or mallinfo with DEBUG
- set will attempt to check every non-mmapped allocated and free chunk
- in the course of computing the summaries.
-
-ABORT_ON_ASSERT_FAILURE default: defined as 1 (true)
- Debugging assertion failures can be nearly impossible if your
- version of the assert macro causes malloc to be called, which will
- lead to a cascade of further failures, blowing the runtime stack.
- ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
- which will usually make debugging easier.
-
-MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32
- The action to take before "return 0" when malloc fails to be able to
- return memory because there is none available.
-
-HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES
- True if this system supports sbrk or an emulation of it.
-
-MORECORE default: sbrk
- The name of the sbrk-style system routine to call to obtain more
- memory. See below for guidance on writing custom MORECORE
- functions. The type of the argument to sbrk/MORECORE varies across
- systems. It cannot be size_t, because it supports negative
- arguments, so it is normally the signed type of the same width as
- size_t (sometimes declared as "intptr_t"). It doesn't much matter
- though. Internally, we only call it with arguments less than half
- the max value of a size_t, which should work across all reasonable
- possibilities, although sometimes generating compiler warnings.
-
-MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE
- If true, take advantage of fact that consecutive calls to MORECORE
- with positive arguments always return contiguous increasing
- addresses. This is true of unix sbrk. It does not hurt too much to
- set it true anyway, since malloc copes with non-contiguities.
- Setting it false when definitely non-contiguous saves time
- and possibly wasted space it would take to discover this though.
-
-MORECORE_CANNOT_TRIM default: NOT defined
- True if MORECORE cannot release space back to the system when given
- negative arguments. This is generally necessary only if you are
- using a hand-crafted MORECORE function that cannot handle negative
- arguments.
-
-NO_SEGMENT_TRAVERSAL default: 0
- If non-zero, suppresses traversals of memory segments
- returned by either MORECORE or CALL_MMAP. This disables
- merging of segments that are contiguous, and selectively
- releasing them to the OS if unused, but bounds execution times.
-
-HAVE_MMAP default: 1 (true)
- True if this system supports mmap or an emulation of it. If so, and
- HAVE_MORECORE is not true, MMAP is used for all system
- allocation. If set and HAVE_MORECORE is true as well, MMAP is
- primarily used to directly allocate very large blocks. It is also
- used as a backup strategy in cases where MORECORE fails to provide
- space from system. Note: A single call to MUNMAP is assumed to be
- able to unmap memory that may have be allocated using multiple calls
- to MMAP, so long as they are adjacent.
-
-HAVE_MREMAP default: 1 on linux, else 0
- If true realloc() uses mremap() to re-allocate large blocks and
- extend or shrink allocation spaces.
-
-MMAP_CLEARS default: 1 except on WINCE.
- True if mmap clears memory so calloc doesn't need to. This is true
- for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
-
-USE_BUILTIN_FFS default: 0 (i.e., not used)
- Causes malloc to use the builtin ffs() function to compute indices.
- Some compilers may recognize and intrinsify ffs to be faster than the
- supplied C version. Also, the case of x86 using gcc is special-cased
- to an asm instruction, so is already as fast as it can be, and so
- this setting has no effect. Similarly for Win32 under recent MS compilers.
- (On most x86s, the asm version is only slightly faster than the C version.)
-
-malloc_getpagesize default: derive from system includes, or 4096.
- The system page size. To the extent possible, this malloc manages
- memory from the system in page-size units. This may be (and
- usually is) a function rather than a constant. This is ignored
- if WIN32, where page size is determined using getSystemInfo during
- initialization. This may be several megabytes if ENABLE_LARGE_PAGES
- is enabled.
-
-ENABLE_LARGE_PAGES default: NOT defined
- Causes the system page size to be the value of GetLargePageMinimum()
- if that function is available (Windows Server 2003/Vista or later).
- This allows the use of large page entries in the MMU which can
- significantly improve performance in large working set applications
- as TLB cache load is reduced by a factor of three. Note that enabling
- this option is equal to locking the process' memory in current
- implementations of Windows and requires the SE_LOCK_MEMORY_PRIVILEGE
- to be held by the process in order to succeed.
-
-USE_DEV_RANDOM default: 0 (i.e., not used)
- Causes malloc to use /dev/random to initialize secure magic seed for
- stamping footers. Otherwise, the current time is used.
-
-NO_MALLINFO default: 0
- If defined, don't compile "mallinfo". This can be a simple way
- of dealing with mismatches between system declarations and
- those in this file.
-
-MALLINFO_FIELD_TYPE default: size_t
- The type of the fields in the mallinfo struct. This was originally
- defined as "int" in SVID etc, but is more usefully defined as
- size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set
-
-REALLOC_ZERO_BYTES_FREES default: not defined
- This should be set if a call to realloc with zero bytes should
- be the same as a call to free. Some people think it should. Otherwise,
- since this malloc returns a unique pointer for malloc(0), so does
- realloc(p, 0).
-
-LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
-LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H
-LACKS_STDLIB_H default: NOT defined unless on WIN32
- Define these if your system does not have these header files.
- You might need to manually insert some of the declarations they provide.
-
-DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS,
- system_info.dwAllocationGranularity in WIN32,
- GetLargePageMinimum() if ENABLE_LARGE_PAGES,
- otherwise 64K.
- Also settable using mallopt(M_GRANULARITY, x)
- The unit for allocating and deallocating memory from the system. On
- most systems with contiguous MORECORE, there is no reason to
- make this more than a page. However, systems with MMAP tend to
- either require or encourage larger granularities. You can increase
- this value to prevent system allocation functions to be called so
- often, especially if they are slow. The value must be at least one
- page and must be a power of two. Setting to 0 causes initialization
- to either page size or win32 region size. (Note: In previous
- versions of malloc, the equivalent of this option was called
- "TOP_PAD")
-
-DEFAULT_GRANULARITY_ALIGNED default: undefined (which means page size)
- Whether to enforce alignment when allocating and deallocating memory
- from the system i.e. the base address of all allocations will be
- aligned to DEFAULT_GRANULARITY if it is set. Note that enabling this carries
- some overhead as multiple calls must now be made when probing for a valid
- aligned value, however it does greatly ease the checking for whether
- a given memory pointer was allocated by this allocator rather than
- some other.
-
-DEFAULT_TRIM_THRESHOLD default: 2MB
- Also settable using mallopt(M_TRIM_THRESHOLD, x)
- The maximum amount of unused top-most memory to keep before
- releasing via malloc_trim in free(). Automatic trimming is mainly
- useful in long-lived programs using contiguous MORECORE. Because
- trimming via sbrk can be slow on some systems, and can sometimes be
- wasteful (in cases where programs immediately afterward allocate
- more large chunks) the value should be high enough so that your
- overall system performance would improve by releasing this much
- memory. As a rough guide, you might set to a value close to the
- average size of a process (program) running on your system.
- Releasing this much memory would allow such a process to run in
- memory. Generally, it is worth tuning trim thresholds when a
- program undergoes phases where several large chunks are allocated
- and released in ways that can reuse each other's storage, perhaps
- mixed with phases where there are no such chunks at all. The trim
- value must be greater than page size to have any useful effect. To
- disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
- some people use of mallocing a huge space and then freeing it at
- program startup, in an attempt to reserve system memory, doesn't
- have the intended effect under automatic trimming, since that memory
- will immediately be returned to the system.
-
-DEFAULT_MMAP_THRESHOLD default: 256K
- Also settable using mallopt(M_MMAP_THRESHOLD, x)
- The request size threshold for using MMAP to directly service a
- request. Requests of at least this size that cannot be allocated
- using already-existing space will be serviced via mmap. (If enough
- normal freed space already exists it is used instead.) Using mmap
- segregates relatively large chunks of memory so that they can be
- individually obtained and released from the host system. A request
- serviced through mmap is never reused by any other request (at least
- not directly; the system may just so happen to remap successive
- requests to the same locations). Segregating space in this way has
- the benefits that: Mmapped space can always be individually released
- back to the system, which helps keep the system level memory demands
- of a long-lived program low. Also, mapped memory doesn't become
- `locked' between other chunks, as can happen with normally allocated
- chunks, which means that even trimming via malloc_trim would not
- release them. However, it has the disadvantage that the space
- cannot be reclaimed, consolidated, and then used to service later
- requests, as happens with normal chunks. The advantages of mmap
- nearly always outweigh disadvantages for "large" chunks, but the
- value of "large" may vary across systems. The default is an
- empirically derived value that works well in most systems. You can
- disable mmap by setting to MAX_SIZE_T.
-
-MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP
- The number of consolidated frees between checks to release
- unused segments when freeing. When using non-contiguous segments,
- especially with multiple mspaces, checking only for topmost space
- doesn't always suffice to trigger trimming. To compensate for this,
- free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
- current number of segments, if greater) try to release unused
- segments to the OS when freeing chunks that result in
- consolidation. The best value for this parameter is a compromise
- between slowing down frees with relatively costly checks that
- rarely trigger versus holding on to unused memory. To effectively
- disable, set to MAX_SIZE_T. This may lead to a very slight speed
- improvement at the expense of carrying around more memory.
-*/
-
-/* Version identifier to allow people to support multiple versions */
-#ifndef DLMALLOC_VERSION
-#define DLMALLOC_VERSION 20804
-#endif /* DLMALLOC_VERSION */
-
-#ifndef WIN32
-#ifdef _WIN32
-#define WIN32 1
-#endif /* _WIN32 */
-#ifdef _WIN32_WCE
-#define LACKS_FCNTL_H
-#define WIN32 1
-#endif /* _WIN32_WCE */
-#endif /* WIN32 */
-#ifdef WIN32
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <tchar.h>
-#define HAVE_MMAP 1
-#define HAVE_MORECORE 0
-#define LACKS_UNISTD_H
-#define LACKS_SYS_PARAM_H
-#define LACKS_SYS_MMAN_H
-#define LACKS_STRING_H
-#define LACKS_STRINGS_H
-#define LACKS_SYS_TYPES_H
-#define LACKS_ERRNO_H
-#ifndef MALLOC_FAILURE_ACTION
-#define MALLOC_FAILURE_ACTION
-#endif /* MALLOC_FAILURE_ACTION */
-#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
-#define MMAP_CLEARS 0
-#else
-#define MMAP_CLEARS 1
-#endif /* _WIN32_WCE */
-#endif /* WIN32 */
-
-#if defined(DARWIN) || defined(_DARWIN)
-/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
-#ifndef HAVE_MORECORE
-#define HAVE_MORECORE 0
-#define HAVE_MMAP 1
-/* OSX allocators provide 16 byte alignment */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)16U)
-#endif
-#endif /* HAVE_MORECORE */
-#endif /* DARWIN */
-
-#ifndef LACKS_SYS_TYPES_H
-#include <sys/types.h> /* For size_t */
-#endif /* LACKS_SYS_TYPES_H */
-
-#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310)
-#define SPIN_LOCKS_AVAILABLE 1
-#else
-#define SPIN_LOCKS_AVAILABLE 0
-#endif
-
-/* The maximum possible size_t value has all bits set */
-#define MAX_SIZE_T (~(size_t)0)
-
-#ifndef ONLY_MSPACES
-#define ONLY_MSPACES 0 /* define to a value */
-#else
-#define ONLY_MSPACES 1
-#endif /* ONLY_MSPACES */
-#ifndef MSPACES
-#if ONLY_MSPACES
-#define MSPACES 1
-#else /* ONLY_MSPACES */
-#define MSPACES 0
-#endif /* ONLY_MSPACES */
-#endif /* MSPACES */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)8U)
-#endif /* MALLOC_ALIGNMENT */
-#ifndef FOOTERS
-#define FOOTERS 0
-#endif /* FOOTERS */
-#ifndef ABORT
-#define ABORT abort()
-#endif /* ABORT */
-#ifndef ABORT_ON_ASSERT_FAILURE
-#define ABORT_ON_ASSERT_FAILURE 1
-#endif /* ABORT_ON_ASSERT_FAILURE */
-#ifndef PROCEED_ON_ERROR
-#define PROCEED_ON_ERROR 0
-#endif /* PROCEED_ON_ERROR */
-#ifndef USE_LOCKS
-#define USE_LOCKS 0
-#endif /* USE_LOCKS */
-#ifndef USE_SPIN_LOCKS
-#if USE_LOCKS && SPIN_LOCKS_AVAILABLE
-#define USE_SPIN_LOCKS 1
-#else
-#define USE_SPIN_LOCKS 0
-#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */
-#endif /* USE_SPIN_LOCKS */
-#ifndef INSECURE
-#define INSECURE 0
-#endif /* INSECURE */
-#ifndef HAVE_MMAP
-#define HAVE_MMAP 1
-#endif /* HAVE_MMAP */
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 1
-#endif /* MMAP_CLEARS */
-#ifndef HAVE_MREMAP
-#ifdef linux
-#define HAVE_MREMAP 1
-#else /* linux */
-#define HAVE_MREMAP 0
-#endif /* linux */
-#endif /* HAVE_MREMAP */
-#ifndef MALLOC_FAILURE_ACTION
-#define MALLOC_FAILURE_ACTION errno = ENOMEM;
-#endif /* MALLOC_FAILURE_ACTION */
-#ifndef HAVE_MORECORE
-#if ONLY_MSPACES
-#define HAVE_MORECORE 0
-#else /* ONLY_MSPACES */
-#define HAVE_MORECORE 1
-#endif /* ONLY_MSPACES */
-#endif /* HAVE_MORECORE */
-#if !HAVE_MORECORE
-#define MORECORE_CONTIGUOUS 0
-#else /* !HAVE_MORECORE */
-#define MORECORE_DEFAULT sbrk
-#ifndef MORECORE_CONTIGUOUS
-#define MORECORE_CONTIGUOUS 1
-#endif /* MORECORE_CONTIGUOUS */
-#endif /* HAVE_MORECORE */
-#ifndef DEFAULT_GRANULARITY
-#if (MORECORE_CONTIGUOUS || defined(WIN32))
-#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
-#else /* MORECORE_CONTIGUOUS */
-#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
-#endif /* MORECORE_CONTIGUOUS */
-#endif /* DEFAULT_GRANULARITY */
-#ifndef DEFAULT_TRIM_THRESHOLD
-#ifndef MORECORE_CANNOT_TRIM
-#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
-#else /* MORECORE_CANNOT_TRIM */
-#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
-#endif /* MORECORE_CANNOT_TRIM */
-#endif /* DEFAULT_TRIM_THRESHOLD */
-#ifndef DEFAULT_MMAP_THRESHOLD
-#if HAVE_MMAP
-#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
-#else /* HAVE_MMAP */
-#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
-#endif /* HAVE_MMAP */
-#endif /* DEFAULT_MMAP_THRESHOLD */
-#ifndef MAX_RELEASE_CHECK_RATE
-#if HAVE_MMAP
-#define MAX_RELEASE_CHECK_RATE 4095
-#else
-#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
-#endif /* HAVE_MMAP */
-#endif /* MAX_RELEASE_CHECK_RATE */
-#ifndef USE_BUILTIN_FFS
-#define USE_BUILTIN_FFS 0
-#endif /* USE_BUILTIN_FFS */
-#ifndef USE_DEV_RANDOM
-#define USE_DEV_RANDOM 0
-#endif /* USE_DEV_RANDOM */
-#ifndef NO_MALLINFO
-#define NO_MALLINFO 0
-#endif /* NO_MALLINFO */
-#ifndef MALLINFO_FIELD_TYPE
-#define MALLINFO_FIELD_TYPE size_t
-#endif /* MALLINFO_FIELD_TYPE */
-#ifndef NO_SEGMENT_TRAVERSAL
-#define NO_SEGMENT_TRAVERSAL 0
-#endif /* NO_SEGMENT_TRAVERSAL */
-
-/*
- mallopt tuning options. SVID/XPG defines four standard parameter
- numbers for mallopt, normally defined in malloc.h. None of these
- are used in this malloc, so setting them has no effect. But this
- malloc does support the following options.
-*/
-
-#define M_TRIM_THRESHOLD (-1)
-#define M_GRANULARITY (-2)
-#define M_MMAP_THRESHOLD (-3)
-
-/* ------------------------ Mallinfo declarations ------------------------ */
-
-#if !NO_MALLINFO
-/*
- This version of malloc supports the standard SVID/XPG mallinfo
- routine that returns a struct containing usage properties and
- statistics. It should work on any system that has a
- /usr/include/malloc.h defining struct mallinfo. The main
- declaration needed is the mallinfo struct that is returned (by-copy)
- by mallinfo(). The malloinfo struct contains a bunch of fields that
- are not even meaningful in this version of malloc. These fields are
- are instead filled by mallinfo() with other numbers that might be of
- interest.
-
- HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
- /usr/include/malloc.h file that includes a declaration of struct
- mallinfo. If so, it is included; else a compliant version is
- declared below. These must be precisely the same for mallinfo() to
- work. The original SVID version of this struct, defined on most
- systems with mallinfo, declares all fields as ints. But some others
- define as unsigned long. If your system defines the fields using a
- type of different width than listed here, you MUST #include your
- system version and #define HAVE_USR_INCLUDE_MALLOC_H.
-*/
-
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
-
-#ifdef HAVE_USR_INCLUDE_MALLOC_H
-#include "/usr/include/malloc.h"
-#else /* HAVE_USR_INCLUDE_MALLOC_H */
-#ifndef STRUCT_MALLINFO_DECLARED
-#define STRUCT_MALLINFO_DECLARED 1
-struct mallinfo {
- MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */
- MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */
- MALLINFO_FIELD_TYPE smblks; /* always 0 */
- MALLINFO_FIELD_TYPE hblks; /* always 0 */
- MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */
- MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */
- MALLINFO_FIELD_TYPE fsmblks; /* always 0 */
- MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
- MALLINFO_FIELD_TYPE fordblks; /* total free space */
- MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
-};
-#endif /* STRUCT_MALLINFO_DECLARED */
-#endif /* HAVE_USR_INCLUDE_MALLOC_H */
-#endif /* NO_MALLINFO */
-
-/*
- Try to persuade compilers to inline. The most critical functions for
- inlining are defined as macros, so these aren't used for them.
-*/
-
-#ifndef FORCEINLINE
- #if defined(__GNUC__)
-#define FORCEINLINE __inline __attribute__ ((always_inline))
- #elif defined(_MSC_VER)
- #define FORCEINLINE __forceinline
- #endif
-#endif
-#ifndef NOINLINE
- #if defined(__GNUC__)
- #define NOINLINE __attribute__ ((noinline))
- #elif defined(_MSC_VER)
- #define NOINLINE __declspec(noinline)
- #else
- #define NOINLINE
- #endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#ifndef FORCEINLINE
- #define FORCEINLINE inline
-#endif
-#endif /* __cplusplus */
-#ifndef FORCEINLINE
- #define FORCEINLINE
-#endif
-
-#if !ONLY_MSPACES
-
-/* ------------------- Declarations of public routines ------------------- */
-
-#ifndef USE_DL_PREFIX
-#define dlcalloc calloc
-#define dlfree free
-#define dlmalloc malloc
-#define dlmemalign memalign
-#define dlrealloc realloc
-#define dlvalloc valloc
-#define dlpvalloc pvalloc
-#define dlmallinfo mallinfo
-#define dlmallopt mallopt
-#define dlmalloc_trim malloc_trim
-#define dlmalloc_stats malloc_stats
-#define dlmalloc_usable_size malloc_usable_size
-#define dlmalloc_footprint malloc_footprint
-#define dlmalloc_max_footprint malloc_max_footprint
-#define dlindependent_calloc independent_calloc
-#define dlindependent_comalloc independent_comalloc
-#endif /* USE_DL_PREFIX */
-
-
-/*
- malloc(size_t n)
- Returns a pointer to a newly allocated chunk of at least n bytes, or
- null if no space is available, in which case errno is set to ENOMEM
- on ANSI C systems.
-
- If n is zero, malloc returns a minimum-sized chunk. (The minimum
- size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
- systems.) Note that size_t is an unsigned type, so calls with
- arguments that would be negative if signed are interpreted as
- requests for huge amounts of space, which will often fail. The
- maximum supported value of n differs across systems, but is in all
- cases less than the maximum representable value of a size_t.
-*/
-void* dlmalloc(size_t);
-
-/*
- free(void* p)
- Releases the chunk of memory pointed to by p, that had been previously
- allocated using malloc or a related routine such as realloc.
- It has no effect if p is null. If p was not malloced or already
- freed, free(p) will by default cause the current program to abort.
-*/
-void dlfree(void*);
-
-/*
- calloc(size_t n_elements, size_t element_size);
- Returns a pointer to n_elements * element_size bytes, with all locations
- set to zero.
-*/
-void* dlcalloc(size_t, size_t);
-
-/*
- realloc(void* p, size_t n)
- Returns a pointer to a chunk of size n that contains the same data
- as does chunk p up to the minimum of (n, p's size) bytes, or null
- if no space is available.
-
- The returned pointer may or may not be the same as p. The algorithm
- prefers extending p in most cases when possible, otherwise it
- employs the equivalent of a malloc-copy-free sequence.
-
- If p is null, realloc is equivalent to malloc.
-
- If space is not available, realloc returns null, errno is set (if on
- ANSI) and p is NOT freed.
-
- if n is for fewer bytes than already held by p, the newly unused
- space is lopped off and freed if possible. realloc with a size
- argument of zero (re)allocates a minimum-sized chunk.
-
- The old unix realloc convention of allowing the last-free'd chunk
- to be used as an argument to realloc is not supported.
-*/
-
-void* dlrealloc(void*, size_t);
-
-/*
- memalign(size_t alignment, size_t n);
- Returns a pointer to a newly allocated chunk of n bytes, aligned
- in accord with the alignment argument.
-
- The alignment argument should be a power of two. If the argument is
- not a power of two, the nearest greater power is used.
- 8-byte alignment is guaranteed by normal malloc calls, so don't
- bother calling memalign with an argument of 8 or less.
-
- Overreliance on memalign is a sure way to fragment space.
-*/
-void* dlmemalign(size_t, size_t);
-
-/*
- valloc(size_t n);
- Equivalent to memalign(pagesize, n), where pagesize is the page
- size of the system. If the pagesize is unknown, 4096 is used.
-*/
-void* dlvalloc(size_t);
-
-/*
- mallopt(int parameter_number, int parameter_value)
- Sets tunable parameters The format is to provide a
- (parameter-number, parameter-value) pair. mallopt then sets the
- corresponding parameter to the argument value if it can (i.e., so
- long as the value is meaningful), and returns 1 if successful else
- 0. To workaround the fact that mallopt is specified to use int,
- not size_t parameters, the value -1 is specially treated as the
- maximum unsigned size_t value.
-
- SVID/XPG/ANSI defines four standard param numbers for mallopt,
- normally defined in malloc.h. None of these are use in this malloc,
- so setting them has no effect. But this malloc also supports other
- options in mallopt. See below for details. Briefly, supported
- parameters are as follows (listed defaults are for "typical"
- configurations).
-
- Symbol param # default allowed param values
- M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables)
- M_GRANULARITY -2 page size any power of 2 >= page size
- M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support)
-*/
-int dlmallopt(int, int);
-
-/*
- malloc_footprint();
- Returns the number of bytes obtained from the system. The total
- number of bytes allocated by malloc, realloc etc., is less than this
- value. Unlike mallinfo, this function returns only a precomputed
- result, so can be called frequently to monitor memory consumption.
- Even if locks are otherwise defined, this function does not use them,
- so results might not be up to date.
-*/
-size_t dlmalloc_footprint(void);
-
-/*
- malloc_max_footprint();
- Returns the maximum number of bytes obtained from the system. This
- value will be greater than current footprint if deallocated space
- has been reclaimed by the system. The peak number of bytes allocated
- by malloc, realloc etc., is less than this value. Unlike mallinfo,
- this function returns only a precomputed result, so can be called
- frequently to monitor memory consumption. Even if locks are
- otherwise defined, this function does not use them, so results might
- not be up to date.
-*/
-size_t dlmalloc_max_footprint(void);
-
-#if !NO_MALLINFO
-/*
- mallinfo()
- Returns (by copy) a struct containing various summary statistics:
-
- arena: current total non-mmapped bytes allocated from system
- ordblks: the number of free chunks
- smblks: always zero.
- hblks: current number of mmapped regions
- hblkhd: total bytes held in mmapped regions
- usmblks: the maximum total allocated space. This will be greater
- than current total if trimming has occurred.
- fsmblks: always zero
- uordblks: current total allocated space (normal or mmapped)
- fordblks: total free space
- keepcost: the maximum number of bytes that could ideally be released
- back to system via malloc_trim. ("ideally" means that
- it ignores page restrictions etc.)
-
- Because these fields are ints, but internal bookkeeping may
- be kept as longs, the reported values may wrap around zero and
- thus be inaccurate.
-*/
-struct mallinfo dlmallinfo(void);
-#endif /* NO_MALLINFO */
-
-/*
- independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
-
- independent_calloc is similar to calloc, but instead of returning a
- single cleared space, it returns an array of pointers to n_elements
- independent elements that can hold contents of size elem_size, each
- of which starts out cleared, and can be independently freed,
- realloc'ed etc. The elements are guaranteed to be adjacently
- allocated (this is not guaranteed to occur with multiple callocs or
- mallocs), which may also improve cache locality in some
- applications.
-
- The "chunks" argument is optional (i.e., may be null, which is
- probably the most typical usage). If it is null, the returned array
- is itself dynamically allocated and should also be freed when it is
- no longer needed. Otherwise, the chunks array must be of at least
- n_elements in length. It is filled in with the pointers to the
- chunks.
-
- In either case, independent_calloc returns this pointer array, or
- null if the allocation failed. If n_elements is zero and "chunks"
- is null, it returns a chunk representing an array with zero elements
- (which should be freed if not wanted).
-
- Each element must be individually freed when it is no longer
- needed. If you'd like to instead be able to free all at once, you
- should instead use regular calloc and assign pointers into this
- space to represent elements. (In this case though, you cannot
- independently free elements.)
-
- independent_calloc simplifies and speeds up implementations of many
- kinds of pools. It may also be useful when constructing large data
- structures that initially have a fixed number of fixed-sized nodes,
- but the number is not known at compile time, and some of the nodes
- may later need to be freed. For example:
-
- struct Node { int item; struct Node* next; };
-
- struct Node* build_list() {
- struct Node** pool;
- int n = read_number_of_nodes_needed();
- if (n <= 0) return 0;
- pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
- if (pool == 0) die();
- // organize into a linked list...
- struct Node* first = pool[0];
- for (i = 0; i < n-1; ++i)
- pool[i]->next = pool[i+1];
- free(pool); // Can now free the array (or not, if it is needed later)
- return first;
- }
-*/
-void** dlindependent_calloc(size_t, size_t, void**);
-
-/*
- independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
-
- independent_comalloc allocates, all at once, a set of n_elements
- chunks with sizes indicated in the "sizes" array. It returns
- an array of pointers to these elements, each of which can be
- independently freed, realloc'ed etc. The elements are guaranteed to
- be adjacently allocated (this is not guaranteed to occur with
- multiple callocs or mallocs), which may also improve cache locality
- in some applications.
-
- The "chunks" argument is optional (i.e., may be null). If it is null
- the returned array is itself dynamically allocated and should also
- be freed when it is no longer needed. Otherwise, the chunks array
- must be of at least n_elements in length. It is filled in with the
- pointers to the chunks.
-
- In either case, independent_comalloc returns this pointer array, or
- null if the allocation failed. If n_elements is zero and chunks is
- null, it returns a chunk representing an array with zero elements
- (which should be freed if not wanted).
-
- Each element must be individually freed when it is no longer
- needed. If you'd like to instead be able to free all at once, you
- should instead use a single regular malloc, and assign pointers at
- particular offsets in the aggregate space. (In this case though, you
- cannot independently free elements.)
-
- independent_comallac differs from independent_calloc in that each
- element may have a different size, and also that it does not
- automatically clear elements.
-
- independent_comalloc can be used to speed up allocation in cases
- where several structs or objects must always be allocated at the
- same time. For example:
-
- struct Head { ... }
- struct Foot { ... }
-
- void send_message(char* msg) {
- int msglen = strlen(msg);
- size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
- void* chunks[3];
- if (independent_comalloc(3, sizes, chunks) == 0)
- die();
- struct Head* head = (struct Head*)(chunks[0]);
- char* body = (char*)(chunks[1]);
- struct Foot* foot = (struct Foot*)(chunks[2]);
- // ...
- }
-
- In general though, independent_comalloc is worth using only for
- larger values of n_elements. For small values, you probably won't
- detect enough difference from series of malloc calls to bother.
-
- Overuse of independent_comalloc can increase overall memory usage,
- since it cannot reuse existing noncontiguous small chunks that
- might be available for some of the elements.
-*/
-void** dlindependent_comalloc(size_t, size_t*, void**);
-
-
-/*
- pvalloc(size_t n);
- Equivalent to valloc(minimum-page-that-holds(n)), that is,
- round up n to nearest pagesize.
- */
-void* dlpvalloc(size_t);
-
-/*
- malloc_trim(size_t pad);
-
- If possible, gives memory back to the system (via negative arguments
- to sbrk) if there is unused memory at the `high' end of the malloc
- pool or in unused MMAP segments. You can call this after freeing
- large blocks of memory to potentially reduce the system-level memory
- requirements of a program. However, it cannot guarantee to reduce
- memory. Under some allocation patterns, some large free blocks of
- memory will be locked between two used chunks, so they cannot be
- given back to the system.
-
- The `pad' argument to malloc_trim represents the amount of free
- trailing space to leave untrimmed. If this argument is zero, only
- the minimum amount of memory to maintain internal data structures
- will be left. Non-zero arguments can be supplied to maintain enough
- trailing space to service future expected allocations without having
- to re-obtain memory from the system.
-
- Malloc_trim returns 1 if it actually released any memory, else 0.
-*/
-int dlmalloc_trim(size_t);
-
-/*
- malloc_stats();
- Prints on stderr the amount of space obtained from the system (both
- via sbrk and mmap), the maximum amount (which may be more than
- current if malloc_trim and/or munmap got called), and the current
- number of bytes allocated via malloc (or realloc, etc) but not yet
- freed. Note that this is the number of bytes allocated, not the
- number requested. It will be larger than the number requested
- because of alignment and bookkeeping overhead. Because it includes
- alignment wastage as being in use, this figure may be greater than
- zero even when no user-level chunks are allocated.
-
- The reported current and maximum system memory can be inaccurate if
- a program makes other calls to system memory allocation functions
- (normally sbrk) outside of malloc.
-
- malloc_stats prints only the most commonly interesting statistics.
- More information can be obtained by calling mallinfo.
-*/
-void dlmalloc_stats(void);
-
-#endif /* ONLY_MSPACES */
-
-/*
- malloc_usable_size(void* p);
-
- Returns the number of bytes you can actually use in
- an allocated chunk, which may be more than you requested (although
- often not) due to alignment and minimum size constraints.
- You can use this many bytes without worrying about
- overwriting other allocated objects. This is not a particularly great
- programming practice. malloc_usable_size can be more useful in
- debugging and assertions, for example:
-
- p = malloc(n);
- assert(malloc_usable_size(p) >= 256);
-*/
-size_t dlmalloc_usable_size(void*);
-
-
-#if MSPACES
-
-/*
- mspace is an opaque type representing an independent
- region of space that supports mspace_malloc, etc.
-*/
-typedef void* mspace;
-
-/*
- create_mspace creates and returns a new independent space with the
- given initial capacity, or, if 0, the default granularity size. It
- returns null if there is no system memory available to create the
- space. If argument locked is non-zero, the space uses a separate
- lock to control access. The capacity of the space will grow
- dynamically as needed to service mspace_malloc requests. You can
- control the sizes of incremental increases of this space by
- compiling with a different DEFAULT_GRANULARITY or dynamically
- setting with mallopt(M_GRANULARITY, value).
-*/
-mspace create_mspace(size_t capacity, int locked);
-
-/*
- destroy_mspace destroys the given space, and attempts to return all
- of its memory back to the system, returning the total number of
- bytes freed. After destruction, the results of access to all memory
- used by the space become undefined.
-*/
-size_t destroy_mspace(mspace msp);
-
-/*
- create_mspace_with_base uses the memory supplied as the initial base
- of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
- space is used for bookkeeping, so the capacity must be at least this
- large. (Otherwise 0 is returned.) When this initial space is
- exhausted, additional memory will be obtained from the system.
- Destroying this space will deallocate all additionally allocated
- space (if possible) but not the initial base.
-*/
-mspace create_mspace_with_base(void* base, size_t capacity, int locked);
-
-/*
- mspace_track_large_chunks controls whether requests for large chunks
- are allocated in their own untracked mmapped regions, separate from
- others in this mspace. By default large chunks are not tracked,
- which reduces fragmentation. However, such chunks are not
- necessarily released to the system upon destroy_mspace. Enabling
- tracking by setting to true may increase fragmentation, but avoids
- leakage when relying on destroy_mspace to release all memory
- allocated using this space. The function returns the previous
- setting.
-*/
-int mspace_track_large_chunks(mspace msp, int enable);
-
-
-/*
- mspace_malloc behaves as malloc, but operates within
- the given space.
-*/
-void* mspace_malloc(mspace msp, size_t bytes);
-
-/*
- mspace_free behaves as free, but operates within
- the given space.
-
- If compiled with FOOTERS==1, mspace_free is not actually needed.
- free may be called instead of mspace_free because freed chunks from
- any space are handled by their originating spaces.
-*/
-void mspace_free(mspace msp, void* mem);
-
-/*
- mspace_realloc behaves as realloc, but operates within
- the given space.
-
- If compiled with FOOTERS==1, mspace_realloc is not actually
- needed. realloc may be called instead of mspace_realloc because
- realloced chunks from any space are handled by their originating
- spaces.
-*/
-void* mspace_realloc(mspace msp, void* mem, size_t newsize);
-
-/*
- mspace_calloc behaves as calloc, but operates within
- the given space.
-*/
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
-
-/*
- mspace_memalign behaves as memalign, but operates within
- the given space.
-*/
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
-
-/*
- mspace_independent_calloc behaves as independent_calloc, but
- operates within the given space.
-*/
-void** mspace_independent_calloc(mspace msp, size_t n_elements,
- size_t elem_size, void* chunks[]);
-
-/*
- mspace_independent_comalloc behaves as independent_comalloc, but
- operates within the given space.
-*/
-void** mspace_independent_comalloc(mspace msp, size_t n_elements,
- size_t sizes[], void* chunks[]);
-
-/*
- mspace_footprint() returns the number of bytes obtained from the
- system for this space.
-*/
-size_t mspace_footprint(mspace msp);
-
-/*
- mspace_max_footprint() returns the peak number of bytes obtained from the
- system for this space.
-*/
-size_t mspace_max_footprint(mspace msp);
-
-
-#if !NO_MALLINFO
-/*
- mspace_mallinfo behaves as mallinfo, but reports properties of
- the given space.
-*/
-struct mallinfo mspace_mallinfo(mspace msp);
-#endif /* NO_MALLINFO */
-
-/*
- malloc_usable_size(void* p) behaves the same as malloc_usable_size;
-*/
- size_t mspace_usable_size(void* mem);
-
-/*
- mspace_malloc_stats behaves as malloc_stats, but reports
- properties of the given space.
-*/
-void mspace_malloc_stats(mspace msp);
-
-/*
- mspace_trim behaves as malloc_trim, but
- operates within the given space.
-*/
-int mspace_trim(mspace msp, size_t pad);
-
-/*
- An alias for mallopt.
-*/
-int mspace_mallopt(int, int);
-
-#endif /* MSPACES */
-
-#ifdef __cplusplus
-} /* end of extern "C" */
-#endif /* __cplusplus */
-
-/*
- ========================================================================
- To make a fully customizable malloc.h header file, cut everything
- above this line, put into file malloc.h, edit to suit, and #include it
- on the next line, as well as in programs that use this malloc.
- ========================================================================
-*/
-
-/* #include "malloc.h" */
-
-/*------------------------------ internal #includes ---------------------- */
-
-#ifdef WIN32
-#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
-#endif /* WIN32 */
-
-#include <stdio.h> /* for printing in malloc_stats */
-
-#ifndef LACKS_ERRNO_H
-#include <errno.h> /* for MALLOC_FAILURE_ACTION */
-#endif /* LACKS_ERRNO_H */
-#if FOOTERS || DEBUG
-#include <time.h> /* for magic initialization */
-#endif /* FOOTERS */
-#ifndef LACKS_STDLIB_H
-#include <stdlib.h> /* for abort() */
-#endif /* LACKS_STDLIB_H */
-#ifdef DEBUG
-#if ABORT_ON_ASSERT_FAILURE
-#undef assert
-#define assert(x) if(!(x)) ABORT
-#else /* ABORT_ON_ASSERT_FAILURE */
-#include <assert.h>
-#endif /* ABORT_ON_ASSERT_FAILURE */
-#else /* DEBUG */
-#ifndef assert
-#define assert(x)
-#endif
-#define DEBUG 0
-#endif /* DEBUG */
-#ifndef LACKS_STRING_H
-#include <string.h> /* for memset etc */
-#endif /* LACKS_STRING_H */
-#if USE_BUILTIN_FFS
-#ifndef LACKS_STRINGS_H
-#include <strings.h> /* for ffs */
-#endif /* LACKS_STRINGS_H */
-#endif /* USE_BUILTIN_FFS */
-#if HAVE_MMAP
-#ifndef LACKS_SYS_MMAN_H
-/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
-#if (defined(linux) && !defined(__USE_GNU))
-#define __USE_GNU 1
-#include <sys/mman.h> /* for mmap */
-#undef __USE_GNU
-#else
-#include <sys/mman.h> /* for mmap */
-#endif /* linux */
-#endif /* LACKS_SYS_MMAN_H */
-#ifndef LACKS_FCNTL_H
-#include <fcntl.h>
-#endif /* LACKS_FCNTL_H */
-#endif /* HAVE_MMAP */
-#ifndef LACKS_UNISTD_H
-#include <unistd.h> /* for sbrk, sysconf */
-#else /* LACKS_UNISTD_H */
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
-extern void* sbrk(ptrdiff_t);
-#endif /* FreeBSD etc */
-#endif /* LACKS_UNISTD_H */
-
-/* Declarations for locking */
-#if USE_LOCKS
-#ifndef WIN32
-#include <pthread.h>
-#if defined (__SVR4) && defined (__sun) /* solaris */
-#include <thread.h>
-#endif /* solaris */
-#else
-#ifndef _M_AMD64
-/* These are already defined on AMD64 builds */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
-LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* _M_AMD64 */
-#pragma intrinsic (_InterlockedCompareExchange)
-#pragma intrinsic (_InterlockedExchange)
-#define interlockedcompareexchange _InterlockedCompareExchange
-#define interlockedexchange _InterlockedExchange
-#endif /* Win32 */
-#endif /* USE_LOCKS */
-
-/* Declarations for bit scanning on win32 */
-#if defined(_MSC_VER) && _MSC_VER>=1300
-#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
-unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#define BitScanForward _BitScanForward
-#define BitScanReverse _BitScanReverse
-#pragma intrinsic(_BitScanForward)
-#pragma intrinsic(_BitScanReverse)
-#endif /* BitScanForward */
-#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
-
-#ifndef WIN32
-#ifndef malloc_getpagesize
-# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
-# ifndef _SC_PAGE_SIZE
-# define _SC_PAGE_SIZE _SC_PAGESIZE
-# endif
-# endif
-# ifdef _SC_PAGE_SIZE
-# define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
-# else
-# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
- extern size_t getpagesize();
-# define malloc_getpagesize getpagesize()
-# else
-# ifdef WIN32 /* use supplied emulation of getpagesize */
-# define malloc_getpagesize getpagesize()
-# else
-# ifndef LACKS_SYS_PARAM_H
-# include <sys/param.h>
-# endif
-# ifdef EXEC_PAGESIZE
-# define malloc_getpagesize EXEC_PAGESIZE
-# else
-# ifdef NBPG
-# ifndef CLSIZE
-# define malloc_getpagesize NBPG
-# else
-# define malloc_getpagesize (NBPG * CLSIZE)
-# endif
-# else
-# ifdef NBPC
-# define malloc_getpagesize NBPC
-# else
-# ifdef PAGESIZE
-# define malloc_getpagesize PAGESIZE
-# else /* just guess */
-# define malloc_getpagesize ((size_t)4096U)
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-#endif
-#endif
-
-
-
-/* ------------------- size_t and alignment properties -------------------- */
-
-/* The byte and bit size of a size_t */
-#define SIZE_T_SIZE (sizeof(size_t))
-#define SIZE_T_BITSIZE (sizeof(size_t) << 3)
-
-/* Some constants coerced to size_t */
-/* Annoying but necessary to avoid errors on some platforms */
-#define SIZE_T_ZERO ((size_t)0)
-#define SIZE_T_ONE ((size_t)1)
-#define SIZE_T_TWO ((size_t)2)
-#define SIZE_T_FOUR ((size_t)4)
-#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1)
-#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2)
-#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
-#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U)
-
-/* The bit mask value corresponding to MALLOC_ALIGNMENT */
-#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
-
-/* True if address a has acceptable alignment */
-#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
-
-/* the number of bytes to offset an address to align it */
-#define align_offset(A)\
- ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
- ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
-
-/*
- malloc_params holds global properties, including those that can be
- dynamically set using mallopt. There is a single instance, mparams,
- initialized in init_mparams. Note that the non-zeroness of "magic"
- also serves as an initialization flag.
-*/
-typedef unsigned int flag_t;
-struct malloc_params {
- volatile size_t magic;
- size_t page_size;
- size_t granularity;
- size_t mmap_threshold;
- size_t trim_threshold;
- flag_t default_mflags;
-};
-
-static struct malloc_params mparams;
-
-/* Ensure mparams initialized */
-#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
-
-/* -------------------------- MMAP preliminaries ------------------------- */
-
-/*
- If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
- checks to fail so compiler optimizer can delete code rather than
- using so many "#if"s.
-*/
-
-
-/* MORECORE and MMAP must return MFAIL on failure */
-#define MFAIL ((void*)(MAX_SIZE_T))
-#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */
-
-#if HAVE_MMAP
-
-#ifndef WIN32
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#define MAP_ANONYMOUS MAP_ANON
-#endif /* MAP_ANON */
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-#define MMAP_IMPL mmap_aligned
-static void* lastAlignedmmap; /* Used as a hint */
-static void* mmap_aligned(void *start, size_t length, int prot, int flags, int fd, off_t offset) {
- void* baseaddress = 0;
- void* ptr = 0;
- if(!start) {
- baseaddress = lastAlignedmmap;
- for(;;) {
- if(baseaddress) flags|=MAP_FIXED;
- ptr = mmap(baseaddress, length, prot, flags, fd, offset);
- if(!ptr)
- baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
- else if((size_t)ptr & (mparams.granularity - SIZE_T_ONE)) {
- munmap(ptr, length);
- baseaddress = (void*)(((size_t)ptr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
- }
- else break;
- }
- }
- else ptr = mmap(start, length, prot, flags, fd, offset);
- if(ptr) lastAlignedmmap = (void*)((size_t) ptr + mparams.granularity);
- return ptr;
-}
-#else
-#define MMAP_IMPL mmap
-#endif /* DEFAULT_GRANULARITY_ALIGNED */
-#define MUNMAP_DEFAULT(a, s) munmap((a), (s))
-#define MMAP_PROT (PROT_READ|PROT_WRITE)
-#ifdef MAP_ANONYMOUS
-#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
-#define MMAP_DEFAULT(s) MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
-#else /* MAP_ANONYMOUS */
-/*
- Nearly all versions of mmap support MAP_ANONYMOUS, so the following
- is unlikely to be needed, but is supplied just in case.
-*/
-#define MMAP_FLAGS (MAP_PRIVATE)
-static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
-#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
- (dev_zero_fd = open("/dev/zero", O_RDWR), \
- MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
- MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
-#endif /* MAP_ANONYMOUS */
-
-#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
-
-#else /* WIN32 */
-
-/* Win32 MMAP via VirtualAlloc */
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-static void* lastWin32mmap; /* Used as a hint */
-#endif /* DEFAULT_GRANULARITY_ALIGNED */
-#ifdef ENABLE_LARGE_PAGES
-static int largepagesavailable = 1;
-#endif /* ENABLE_LARGE_PAGES */
-static FORCEINLINE void* win32mmap(size_t size) {
- void* baseaddress = 0;
- void* ptr = 0;
-#ifdef ENABLE_LARGE_PAGES
- /* Note that large pages are *always* allocated on a large page boundary.
- If however granularity is small then don't waste a kernel call if size
- isn't around the size of a large page */
- if(largepagesavailable && size >= 1*1024*1024) {
- ptr = VirtualAlloc(baseaddress, size, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE);
- if(!ptr && ERROR_PRIVILEGE_NOT_HELD==GetLastError()) largepagesavailable=0;
- }
-#endif
- if(!ptr) {
-#ifdef DEFAULT_GRANULARITY_ALIGNED
- /* We try to avoid overhead by speculatively reserving at aligned
- addresses until we succeed */
- baseaddress = lastWin32mmap;
- for(;;) {
- void* reserveaddr = VirtualAlloc(baseaddress, size, MEM_RESERVE, PAGE_READWRITE);
- if(!reserveaddr)
- baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
- else if((size_t)reserveaddr & (mparams.granularity - SIZE_T_ONE)) {
- VirtualFree(reserveaddr, 0, MEM_RELEASE);
- baseaddress = (void*)(((size_t)reserveaddr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
- }
- else break;
- }
-#endif
- if(!ptr) ptr = VirtualAlloc(baseaddress, size, baseaddress ? MEM_COMMIT : MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
-#if DEBUG
- if(lastWin32mmap && ptr!=lastWin32mmap) printf("Non-contiguous VirtualAlloc between %p and %p\n", ptr, lastWin32mmap);
-#endif
-#ifdef DEFAULT_GRANULARITY_ALIGNED
- if(ptr) lastWin32mmap = (void*)((size_t) ptr + mparams.granularity);
-#endif
- }
-#if DEBUG
-#ifdef ENABLE_LARGE_PAGES
- printf("VirtualAlloc returns %p size %u. LargePagesAvailable=%d\n", ptr, size, largepagesavailable);
-#else
- printf("VirtualAlloc returns %p size %u\n", ptr, size);
-#endif
-#endif
- return (ptr != 0)? ptr: MFAIL;
-}
-
-/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static FORCEINLINE void* win32direct_mmap(size_t size) {
- void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
- PAGE_READWRITE);
- return (ptr != 0)? ptr: MFAIL;
-}
-
-/* This function supports releasing coalesed segments */
-static FORCEINLINE int win32munmap(void* ptr, size_t size) {
- MEMORY_BASIC_INFORMATION minfo;
- char* cptr = (char*)ptr;
- while (size) {
- if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
- return -1;
- if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
- minfo.State != MEM_COMMIT || minfo.RegionSize > size)
- return -1;
- if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
- return -1;
- cptr += minfo.RegionSize;
- size -= minfo.RegionSize;
- }
- return 0;
-}
-
-#define MMAP_DEFAULT(s) win32mmap(s)
-#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
-#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
-#endif /* WIN32 */
-#endif /* HAVE_MMAP */
-
-#if HAVE_MREMAP
-#ifndef WIN32
-#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
-#endif /* WIN32 */
-#endif /* HAVE_MREMAP */
-
-
-/**
- * Define CALL_MORECORE
- */
-#if HAVE_MORECORE
- #ifdef MORECORE
- #define CALL_MORECORE(S) MORECORE(S)
- #else /* MORECORE */
- #define CALL_MORECORE(S) MORECORE_DEFAULT(S)
- #endif /* MORECORE */
-#else /* HAVE_MORECORE */
- #define CALL_MORECORE(S) MFAIL
-#endif /* HAVE_MORECORE */
-
-/**
- * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
- */
-#if HAVE_MMAP
- #define USE_MMAP_BIT (SIZE_T_ONE)
-
- #ifdef MMAP
- #define CALL_MMAP(s) MMAP(s)
- #else /* MMAP */
- #define CALL_MMAP(s) MMAP_DEFAULT(s)
- #endif /* MMAP */
- #ifdef MUNMAP
- #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
- #else /* MUNMAP */
- #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s))
- #endif /* MUNMAP */
- #ifdef DIRECT_MMAP
- #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
- #else /* DIRECT_MMAP */
- #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
- #endif /* DIRECT_MMAP */
-#else /* HAVE_MMAP */
- #define USE_MMAP_BIT (SIZE_T_ZERO)
-
- #define MMAP(s) MFAIL
- #define MUNMAP(a, s) (-1)
- #define DIRECT_MMAP(s) MFAIL
- #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
- #define CALL_MMAP(s) MMAP(s)
- #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
-#endif /* HAVE_MMAP */
-
-/**
- * Define CALL_MREMAP
- */
-#if HAVE_MMAP && HAVE_MREMAP
- #ifdef MREMAP
- #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
- #else /* MREMAP */
- #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
- #endif /* MREMAP */
-#else /* HAVE_MMAP && HAVE_MREMAP */
- #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
-#endif /* HAVE_MMAP && HAVE_MREMAP */
-
-/* mstate bit set if continguous morecore disabled or failed */
-#define USE_NONCONTIGUOUS_BIT (4U)
-
-/* segment bit set in create_mspace_with_base */
-#define EXTERN_BIT (8U)
-
-
-/* --------------------------- Lock preliminaries ------------------------ */
-
-/*
- When locks are defined, there is one global lock, plus
- one per-mspace lock.
-
- The global lock_ensures that mparams.magic and other unique
- mparams values are initialized only once. It also protects
- sequences of calls to MORECORE. In many cases sys_alloc requires
- two calls, that should not be interleaved with calls by other
- threads. This does not protect against direct calls to MORECORE
- by other threads not using this lock, so there is still code to
- cope the best we can on interference.
-
- Per-mspace locks surround calls to malloc, free, etc. To enable use
- in layered extensions, per-mspace locks are reentrant.
-
- Because lock-protected regions generally have bounded times, it is
- OK to use the supplied simple spinlocks in the custom versions for
- x86. Spinlocks are likely to improve performance for lightly
- contended applications, but worsen performance under heavy
- contention.
-
- If USE_LOCKS is > 1, the definitions of lock routines here are
- bypassed, in which case you will need to define the type MLOCK_T,
- and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly
- TRY_LOCK (which is not used in this malloc, but commonly needed in
- extensions.) You must also declare a
- static MLOCK_T malloc_global_mutex = { initialization values };.
-
-*/
-
-#if USE_LOCKS == 1
-
-#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE
-#ifndef WIN32
-
-/* Custom pthread-style spin locks on x86 and x64 for gcc */
-struct pthread_mlock_t {
- volatile unsigned int l;
- char cachelinepadding[64];
- unsigned int c;
- pthread_t threadid;
-};
-#define MLOCK_T struct pthread_mlock_t
-#define CURRENT_THREAD pthread_self()
-#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
-#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl)
-#define RELEASE_LOCK(sl) pthread_release_lock(sl)
-#define TRY_LOCK(sl) pthread_try_lock(sl)
-#define SPINS_PER_YIELD 63
-
-static MLOCK_T malloc_global_mutex = { 0, "", 0, 0};
-
-static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) {
- int spins = 0;
- volatile unsigned int* lp = &sl->l;
- for (;;) {
- if (*lp != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 0;
- }
- }
- else {
- /* place args to cmpxchgl in locals to evade oddities in some gccs */
- int cmp = 0;
- int val = 1;
- int ret;
- __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
- : "=a" (ret)
- : "r" (val), "m" (*(lp)), "0"(cmp)
- : "memory", "cc");
- if (!ret) {
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 0;
- }
- }
- if ((++spins & SPINS_PER_YIELD) == 0) {
-#if defined (__SVR4) && defined (__sun) /* solaris */
- thr_yield();
-#else
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
- sched_yield();
-#else /* no-op yield on unknown systems */
- ;
-#endif /* __linux__ || __FreeBSD__ || __APPLE__ */
-#endif /* solaris */
- }
- }
-}
-
-static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) {
- volatile unsigned int* lp = &sl->l;
- assert(*lp != 0);
- assert(sl->threadid == CURRENT_THREAD);
- if (--sl->c == 0) {
- sl->threadid = 0;
- int prev = 0;
- int ret;
- __asm__ __volatile__ ("lock; xchgl %0, %1"
- : "=r" (ret)
- : "m" (*(lp)), "0"(prev)
- : "memory");
- }
-}
-
-static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) {
- volatile unsigned int* lp = &sl->l;
- if (*lp != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 1;
- }
- }
- else {
- int cmp = 0;
- int val = 1;
- int ret;
- __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
- : "=a" (ret)
- : "r" (val), "m" (*(lp)), "0"(cmp)
- : "memory", "cc");
- if (!ret) {
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 1;
- }
- }
- return 0;
-}
-
-
-#else /* WIN32 */
-/* Custom win32-style spin locks on x86 and x64 for MSC */
-struct win32_mlock_t {
- volatile long l;
- char cachelinepadding[64];
- unsigned int c;
- long threadid;
-};
-
-#define MLOCK_T struct win32_mlock_t
-#define CURRENT_THREAD ((long)GetCurrentThreadId())
-#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
-#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl)
-#define RELEASE_LOCK(sl) win32_release_lock(sl)
-#define TRY_LOCK(sl) win32_try_lock(sl)
-#define SPINS_PER_YIELD 63
-
-static MLOCK_T malloc_global_mutex = { 0, 0, 0};
-
-static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) {
- int spins = 0;
- for (;;) {
- if (sl->l != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 0;
- }
- }
- else {
- if (!interlockedexchange(&sl->l, 1)) {
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 0;
- }
- }
- if ((++spins & SPINS_PER_YIELD) == 0)
- SleepEx(0, FALSE);
- }
-}
-
-static FORCEINLINE void win32_release_lock (MLOCK_T *sl) {
- assert(sl->threadid == CURRENT_THREAD);
- assert(sl->l != 0);
- if (--sl->c == 0) {
- sl->threadid = 0;
- interlockedexchange (&sl->l, 0);
- }
-}
-
-static FORCEINLINE int win32_try_lock (MLOCK_T *sl) {
- if (sl->l != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 1;
- }
- }
- else {
- if (!interlockedexchange(&sl->l, 1)){
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 1;
- }
- }
- return 0;
-}
-
-#endif /* WIN32 */
-#else /* USE_SPIN_LOCKS */
-
-#ifndef WIN32
-/* pthreads-based locks */
-
-#define MLOCK_T pthread_mutex_t
-#define CURRENT_THREAD pthread_self()
-#define INITIAL_LOCK(sl) pthread_init_lock(sl)
-#define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl)
-#define RELEASE_LOCK(sl) pthread_mutex_unlock(sl)
-#define TRY_LOCK(sl) (!pthread_mutex_trylock(sl))
-
-static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/* Cope with old-style linux recursive lock initialization by adding */
-/* skipped internal declaration from pthread.h */
-#ifdef linux
-#ifndef PTHREAD_MUTEX_RECURSIVE
-extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
- int __kind));
-#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
-#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
-#endif
-#endif
-
-static int pthread_init_lock (MLOCK_T *sl) {
- pthread_mutexattr_t attr;
- if (pthread_mutexattr_init(&attr)) return 1;
- if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
- if (pthread_mutex_init(sl, &attr)) return 1;
- if (pthread_mutexattr_destroy(&attr)) return 1;
- return 0;
-}
-
-#else /* WIN32 */
-/* Win32 critical sections */
-#define MLOCK_T CRITICAL_SECTION
-#define CURRENT_THREAD GetCurrentThreadId()
-#define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000))
-#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0)
-#define RELEASE_LOCK(s) LeaveCriticalSection(sl)
-#define TRY_LOCK(s) TryEnterCriticalSection(sl)
-#define NEED_GLOBAL_LOCK_INIT
-
-static MLOCK_T malloc_global_mutex;
-static volatile long malloc_global_mutex_status;
-
-/* Use spin loop to initialize global lock */
-static void init_malloc_global_mutex() {
- for (;;) {
- long stat = malloc_global_mutex_status;
- if (stat > 0)
- return;
- /* transition to < 0 while initializing, then to > 0) */
- if (stat == 0 &&
- interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) {
- InitializeCriticalSection(&malloc_global_mutex);
- interlockedexchange(&malloc_global_mutex_status,1);
- return;
- }
- SleepEx(0, FALSE);
- }
-}
-
-#endif /* WIN32 */
-#endif /* USE_SPIN_LOCKS */
-#endif /* USE_LOCKS == 1 */
-
-/* ----------------------- User-defined locks ------------------------ */
-
-#if USE_LOCKS > 1
-/* Define your own lock implementation here */
-/* #define INITIAL_LOCK(sl) ... */
-/* #define ACQUIRE_LOCK(sl) ... */
-/* #define RELEASE_LOCK(sl) ... */
-/* #define TRY_LOCK(sl) ... */
-/* static MLOCK_T malloc_global_mutex = ... */
-#endif /* USE_LOCKS > 1 */
-
-/* ----------------------- Lock-based state ------------------------ */
-
-#if USE_LOCKS
-#define USE_LOCK_BIT (2U)
-#else /* USE_LOCKS */
-#define USE_LOCK_BIT (0U)
-#define INITIAL_LOCK(l)
-#endif /* USE_LOCKS */
-
-#if USE_LOCKS
-#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
-#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex);
-#endif
-#ifndef RELEASE_MALLOC_GLOBAL_LOCK
-#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex);
-#endif
-#else /* USE_LOCKS */
-#define ACQUIRE_MALLOC_GLOBAL_LOCK()
-#define RELEASE_MALLOC_GLOBAL_LOCK()
-#endif /* USE_LOCKS */
-
-
-/* ----------------------- Chunk representations ------------------------ */
-
-/*
- (The following includes lightly edited explanations by Colin Plumb.)
-
- The malloc_chunk declaration below is misleading (but accurate and
- necessary). It declares a "view" into memory allowing access to
- necessary fields at known offsets from a given base.
-
- Chunks of memory are maintained using a `boundary tag' method as
- originally described by Knuth. (See the paper by Paul Wilson
- ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
- techniques.) Sizes of free chunks are stored both in the front of
- each chunk and at the end. This makes consolidating fragmented
- chunks into bigger chunks fast. The head fields also hold bits
- representing whether chunks are free or in use.
-
- Here are some pictures to make it clearer. They are "exploded" to
- show that the state of a chunk can be thought of as extending from
- the high 31 bits of the head field of its header through the
- prev_foot and PINUSE_BIT bit of the following chunk header.
-
- A chunk that's in use looks like:
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk (if P = 0) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
- | Size of this chunk 1| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | |
- +- -+
- | |
- +- -+
- | :
- +- size - sizeof(size_t) available payload bytes -+
- : |
- chunk-> +- -+
- | |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
- | Size of next chunk (may or may not be in use) | +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- And if it's free, it looks like this:
-
- chunk-> +- -+
- | User payload (must be in use, or we would have merged!) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
- | Size of this chunk 0| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Next pointer |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Prev pointer |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | :
- +- size - sizeof(struct chunk) unused bytes -+
- : |
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of this chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
- | Size of next chunk (must be in use, or we would have merged)| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | :
- +- User payload -+
- : |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |0|
- +-+
- Note that since we always merge adjacent free chunks, the chunks
- adjacent to a free chunk must be in use.
-
- Given a pointer to a chunk (which can be derived trivially from the
- payload pointer) we can, in O(1) time, find out whether the adjacent
- chunks are free, and if so, unlink them from the lists that they
- are on and merge them with the current chunk.
-
- Chunks always begin on even word boundaries, so the mem portion
- (which is returned to the user) is also on an even word boundary, and
- thus at least double-word aligned.
-
- The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
- chunk size (which is always a multiple of two words), is an in-use
- bit for the *previous* chunk. If that bit is *clear*, then the
- word before the current chunk size contains the previous chunk
- size, and can be used to find the front of the previous chunk.
- The very first chunk allocated always has this bit set, preventing
- access to non-existent (or non-owned) memory. If pinuse is set for
- any given chunk, then you CANNOT determine the size of the
- previous chunk, and might even get a memory addressing fault when
- trying to do so.
-
- The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
- the chunk size redundantly records whether the current chunk is
- inuse (unless the chunk is mmapped). This redundancy enables usage
- checks within free and realloc, and reduces indirection when freeing
- and consolidating chunks.
-
- Each freshly allocated chunk must have both cinuse and pinuse set.
- That is, each allocated chunk borders either a previously allocated
- and still in-use chunk, or the base of its memory arena. This is
- ensured by making all allocations from the the `lowest' part of any
- found chunk. Further, no free chunk physically borders another one,
- so each free chunk is known to be preceded and followed by either
- inuse chunks or the ends of memory.
-
- Note that the `foot' of the current chunk is actually represented
- as the prev_foot of the NEXT chunk. This makes it easier to
- deal with alignments etc but can be very confusing when trying
- to extend or adapt this code.
-
- The exceptions to all this are
-
- 1. The special chunk `top' is the top-most available chunk (i.e.,
- the one bordering the end of available memory). It is treated
- specially. Top is never included in any bin, is used only if
- no other chunk is available, and is released back to the
- system if it is very large (see M_TRIM_THRESHOLD). In effect,
- the top chunk is treated as larger (and thus less well
- fitting) than any other available chunk. The top chunk
- doesn't update its trailing size field since there is no next
- contiguous chunk that would have to index off it. However,
- space is still allocated for it (TOP_FOOT_SIZE) to enable
- separation or merging when space is extended.
-
- 3. Chunks allocated via mmap, have both cinuse and pinuse bits
- cleared in their head fields. Because they are allocated
- one-by-one, each must carry its own prev_foot field, which is
- also used to hold the offset this chunk has within its mmapped
- region, which is needed to preserve alignment. Each mmapped
- chunk is trailed by the first two fields of a fake next-chunk
- for sake of usage checks.
-
-*/
-
-struct malloc_chunk {
- size_t prev_foot; /* Size of previous chunk (if free). */
- size_t head; /* Size and inuse bits. */
- struct malloc_chunk* fd; /* double links -- used only if free. */
- struct malloc_chunk* bk;
-};
-
-typedef struct malloc_chunk mchunk;
-typedef struct malloc_chunk* mchunkptr;
-typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */
-typedef unsigned int bindex_t; /* Described below */
-typedef unsigned int binmap_t; /* Described below */
-
-/* ------------------- Chunks sizes and alignments ----------------------- */
-
-#define MCHUNK_SIZE (sizeof(mchunk))
-
-#if FOOTERS
-#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
-#else /* FOOTERS */
-#define CHUNK_OVERHEAD (SIZE_T_SIZE)
-#endif /* FOOTERS */
-
-/* MMapped chunks need a second word of overhead ... */
-#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
-/* ... and additional padding for fake next-chunk at foot */
-#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES)
-
-/* The smallest size we can malloc is an aligned minimal chunk */
-#define MIN_CHUNK_SIZE\
- ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
-
-/* conversion from malloc headers to user pointers, and back */
-#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES))
-#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
-/* chunk associated with aligned address A */
-#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
-
-/* Bounds on request (not chunk) sizes. */
-#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2)
-#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
-
-/* pad request bytes into a usable size */
-#define pad_request(req) \
- (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
-
-/* pad request, checking for minimum (but not maximum) */
-#define request2size(req) \
- (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
-
-
-/* ------------------ Operations on head and foot fields ----------------- */
-
-/*
- The head field of a chunk is or'ed with PINUSE_BIT when previous
- adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
- use, unless mmapped, in which case both bits are cleared.
-
- FLAG4_BIT is not used by this malloc, but might be useful in extensions.
-*/
-
-#define PINUSE_BIT (SIZE_T_ONE)
-#define CINUSE_BIT (SIZE_T_TWO)
-#define FLAG4_BIT (SIZE_T_FOUR)
-#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT)
-#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
-
-/* Head value for fenceposts */
-#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE)
-
-/* extraction of fields from head words */
-#define cinuse(p) ((p)->head & CINUSE_BIT)
-#define pinuse(p) ((p)->head & PINUSE_BIT)
-#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT)
-#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0)
-
-#define chunksize(p) ((p)->head & ~(FLAG_BITS))
-
-#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
-
-/* Treat space at ptr +/- offset as a chunk */
-#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
-#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
-
-/* Ptr to next or previous physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
-#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
-
-/* extract next chunk's pinuse bit */
-#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
-
-/* Get/set size at footer */
-#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot)
-#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
-
-/* Set size, pinuse bit, and foot */
-#define set_size_and_pinuse_of_free_chunk(p, s)\
- ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
-
-/* Set size, pinuse bit, foot, and clear next pinuse */
-#define set_free_with_pinuse(p, s, n)\
- (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
-
-/* Get the internal overhead associated with chunk p */
-#define overhead_for(p)\
- (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
-
-/* Return true if malloced space is not necessarily cleared */
-#if MMAP_CLEARS
-#define calloc_must_clear(p) (!is_mmapped(p))
-#else /* MMAP_CLEARS */
-#define calloc_must_clear(p) (1)
-#endif /* MMAP_CLEARS */
-
-/* ---------------------- Overlaid data structures ----------------------- */
-
-/*
- When chunks are not in use, they are treated as nodes of either
- lists or trees.
-
- "Small" chunks are stored in circular doubly-linked lists, and look
- like this:
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `head:' | Size of chunk, in bytes |P|
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Forward pointer to next chunk in list |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Back pointer to previous chunk in list |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Unused space (may be 0 bytes long) .
- . .
- . |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `foot:' | Size of chunk, in bytes |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- Larger chunks are kept in a form of bitwise digital trees (aka
- tries) keyed on chunksizes. Because malloc_tree_chunks are only for
- free chunks greater than 256 bytes, their size doesn't impose any
- constraints on user chunk sizes. Each node looks like:
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `head:' | Size of chunk, in bytes |P|
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Forward pointer to next chunk of same size |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Back pointer to previous chunk of same size |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Pointer to left child (child[0]) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Pointer to right child (child[1]) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Pointer to parent |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | bin index of this chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Unused space .
- . |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `foot:' | Size of chunk, in bytes |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- Each tree holding treenodes is a tree of unique chunk sizes. Chunks
- of the same size are arranged in a circularly-linked list, with only
- the oldest chunk (the next to be used, in our FIFO ordering)
- actually in the tree. (Tree members are distinguished by a non-null
- parent pointer.) If a chunk with the same size an an existing node
- is inserted, it is linked off the existing node using pointers that
- work in the same way as fd/bk pointers of small chunks.
-
- Each tree contains a power of 2 sized range of chunk sizes (the
- smallest is 0x100 <= x < 0x180), which is is divided in half at each
- tree level, with the chunks in the smaller half of the range (0x100
- <= x < 0x140 for the top nose) in the left subtree and the larger
- half (0x140 <= x < 0x180) in the right subtree. This is, of course,
- done by inspecting individual bits.
-
- Using these rules, each node's left subtree contains all smaller
- sizes than its right subtree. However, the node at the root of each
- subtree has no particular ordering relationship to either. (The
- dividing line between the subtree sizes is based on trie relation.)
- If we remove the last chunk of a given size from the interior of the
- tree, we need to replace it with a leaf node. The tree ordering
- rules permit a node to be replaced by any leaf below it.
-
- The smallest chunk in a tree (a common operation in a best-fit
- allocator) can be found by walking a path to the leftmost leaf in
- the tree. Unlike a usual binary tree, where we follow left child
- pointers until we reach a null, here we follow the right child
- pointer any time the left one is null, until we reach a leaf with
- both child pointers null. The smallest chunk in the tree will be
- somewhere along that path.
-
- The worst case number of steps to add, find, or remove a node is
- bounded by the number of bits differentiating chunks within
- bins. Under current bin calculations, this ranges from 6 up to 21
- (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
- is of course much better.
-*/
-
-struct malloc_tree_chunk {
- /* The first four fields must be compatible with malloc_chunk */
- size_t prev_foot;
- size_t head;
- struct malloc_tree_chunk* fd;
- struct malloc_tree_chunk* bk;
-
- struct malloc_tree_chunk* child[2];
- struct malloc_tree_chunk* parent;
- bindex_t index;
-};
-
-typedef struct malloc_tree_chunk tchunk;
-typedef struct malloc_tree_chunk* tchunkptr;
-typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
-
-/* A little helper macro for trees */
-#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
-
-/* ----------------------------- Segments -------------------------------- */
-
-/*
- Each malloc space may include non-contiguous segments, held in a
- list headed by an embedded malloc_segment record representing the
- top-most space. Segments also include flags holding properties of
- the space. Large chunks that are directly allocated by mmap are not
- included in this list. They are instead independently created and
- destroyed without otherwise keeping track of them.
-
- Segment management mainly comes into play for spaces allocated by
- MMAP. Any call to MMAP might or might not return memory that is
- adjacent to an existing segment. MORECORE normally contiguously
- extends the current space, so this space is almost always adjacent,
- which is simpler and faster to deal with. (This is why MORECORE is
- used preferentially to MMAP when both are available -- see
- sys_alloc.) When allocating using MMAP, we don't use any of the
- hinting mechanisms (inconsistently) supported in various
- implementations of unix mmap, or distinguish reserving from
- committing memory. Instead, we just ask for space, and exploit
- contiguity when we get it. It is probably possible to do
- better than this on some systems, but no general scheme seems
- to be significantly better.
-
- Management entails a simpler variant of the consolidation scheme
- used for chunks to reduce fragmentation -- new adjacent memory is
- normally prepended or appended to an existing segment. However,
- there are limitations compared to chunk consolidation that mostly
- reflect the fact that segment processing is relatively infrequent
- (occurring only when getting memory from system) and that we
- don't expect to have huge numbers of segments:
-
- * Segments are not indexed, so traversal requires linear scans. (It
- would be possible to index these, but is not worth the extra
- overhead and complexity for most programs on most platforms.)
- * New segments are only appended to old ones when holding top-most
- memory; if they cannot be prepended to others, they are held in
- different segments.
-
- Except for the top-most segment of an mstate, each segment record
- is kept at the tail of its segment. Segments are added by pushing
- segment records onto the list headed by &mstate.seg for the
- containing mstate.
-
- Segment flags control allocation/merge/deallocation policies:
- * If EXTERN_BIT set, then we did not allocate this segment,
- and so should not try to deallocate or merge with others.
- (This currently holds only for the initial segment passed
- into create_mspace_with_base.)
- * If USE_MMAP_BIT set, the segment may be merged with
- other surrounding mmapped segments and trimmed/de-allocated
- using munmap.
- * If neither bit is set, then the segment was obtained using
- MORECORE so can be merged with surrounding MORECORE'd segments
- and deallocated/trimmed using MORECORE with negative arguments.
-*/
-
-struct malloc_segment {
- char* base; /* base address */
- size_t size; /* allocated size */
- struct malloc_segment* next; /* ptr to next segment */
- flag_t sflags; /* mmap and extern flag */
-};
-
-#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT)
-#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT)
-
-typedef struct malloc_segment msegment;
-typedef struct malloc_segment* msegmentptr;
-
-/* ---------------------------- malloc_state ----------------------------- */
-
-/*
- A malloc_state holds all of the bookkeeping for a space.
- The main fields are:
-
- Top
- The topmost chunk of the currently active segment. Its size is
- cached in topsize. The actual size of topmost space is
- topsize+TOP_FOOT_SIZE, which includes space reserved for adding
- fenceposts and segment records if necessary when getting more
- space from the system. The size at which to autotrim top is
- cached from mparams in trim_check, except that it is disabled if
- an autotrim fails.
-
- Designated victim (dv)
- This is the preferred chunk for servicing small requests that
- don't have exact fits. It is normally the chunk split off most
- recently to service another small request. Its size is cached in
- dvsize. The link fields of this chunk are not maintained since it
- is not kept in a bin.
-
- SmallBins
- An array of bin headers for free chunks. These bins hold chunks
- with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
- chunks of all the same size, spaced 8 bytes apart. To simplify
- use in double-linked lists, each bin header acts as a malloc_chunk
- pointing to the real first node, if it exists (else pointing to
- itself). This avoids special-casing for headers. But to avoid
- waste, we allocate only the fd/bk pointers of bins, and then use
- repositioning tricks to treat these as the fields of a chunk.
-
- TreeBins
- Treebins are pointers to the roots of trees holding a range of
- sizes. There are 2 equally spaced treebins for each power of two
- from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
- larger.
-
- Bin maps
- There is one bit map for small bins ("smallmap") and one for
- treebins ("treemap). Each bin sets its bit when non-empty, and
- clears the bit when empty. Bit operations are then used to avoid
- bin-by-bin searching -- nearly all "search" is done without ever
- looking at bins that won't be selected. The bit maps
- conservatively use 32 bits per map word, even if on 64bit system.
- For a good description of some of the bit-based techniques used
- here, see Henry S. Warren Jr's book "Hacker's Delight" (and
- supplement at http://hackersdelight.org/). Many of these are
- intended to reduce the branchiness of paths through malloc etc, as
- well as to reduce the number of memory locations read or written.
-
- Segments
- A list of segments headed by an embedded malloc_segment record
- representing the initial space.
-
- Address check support
- The least_addr field is the least address ever obtained from
- MORECORE or MMAP. Attempted frees and reallocs of any address less
- than this are trapped (unless INSECURE is defined).
-
- Magic tag
- A cross-check field that should always hold same value as mparams.magic.
-
- Flags
- Bits recording whether to use MMAP, locks, or contiguous MORECORE
-
- Statistics
- Each space keeps track of current and maximum system memory
- obtained via MORECORE or MMAP.
-
- Trim support
- Fields holding the amount of unused topmost memory that should trigger
- timming, and a counter to force periodic scanning to release unused
- non-topmost segments.
-
- Locking
- If USE_LOCKS is defined, the "mutex" lock is acquired and released
- around every public call using this mspace.
-
- Extension support
- A void* pointer and a size_t field that can be used to help implement
- extensions to this malloc.
-*/
-
-/* Bin types, widths and sizes */
-#define NSMALLBINS (32U)
-#define NTREEBINS (32U)
-#define SMALLBIN_SHIFT (3U)
-#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
-#define TREEBIN_SHIFT (8U)
-#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
-#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
-#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
-
-struct malloc_state {
- binmap_t smallmap;
- binmap_t treemap;
- size_t dvsize;
- size_t topsize;
- char* least_addr;
- mchunkptr dv;
- mchunkptr top;
- size_t trim_check;
- size_t release_checks;
- size_t magic;
- mchunkptr smallbins[(NSMALLBINS+1)*2];
- tbinptr treebins[NTREEBINS];
- size_t footprint;
- size_t max_footprint;
- flag_t mflags;
- msegment seg;
-#if USE_LOCKS
- MLOCK_T mutex; /* locate lock among fields that rarely change */
-#endif /* USE_LOCKS */
- void* extp; /* Unused but available for extensions */
- size_t exts;
-};
-
-typedef struct malloc_state* mstate;
-
-/* ------------- Global malloc_state and malloc_params ------------------- */
-
-#if !ONLY_MSPACES
-
-/* The global malloc_state used for all non-"mspace" calls */
-static struct malloc_state _gm_;
-#define gm (&_gm_)
-#define is_global(M) ((M) == &_gm_)
-
-#endif /* !ONLY_MSPACES */
-
-#define is_initialized(M) ((M)->top != 0)
-
-/* -------------------------- system alloc setup ------------------------- */
-
-/* Operations on mflags */
-
-#define use_lock(M) ((M)->mflags & USE_LOCK_BIT)
-#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT)
-#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT)
-
-#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT)
-#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT)
-#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT)
-
-#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT)
-#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT)
-
-#define set_lock(M,L)\
- ((M)->mflags = (L)?\
- ((M)->mflags | USE_LOCK_BIT) :\
- ((M)->mflags & ~USE_LOCK_BIT))
-
-/* page-align a size */
-#define page_align(S)\
- (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
-
-/* granularity-align a size */
-#define granularity_align(S)\
- (((S) + (mparams.granularity - SIZE_T_ONE))\
- & ~(mparams.granularity - SIZE_T_ONE))
-
-
-/* For mmap, use granularity alignment on windows, else page-align */
-#ifdef WIN32
-#define mmap_align(S) granularity_align(S)
-#else
-#define mmap_align(S) page_align(S)
-#endif
-
-/* For sys_alloc, enough padding to ensure can malloc request on success */
-#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
-
-#define is_page_aligned(S)\
- (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
-#define is_granularity_aligned(S)\
- (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
-
-/* True if segment S holds address A */
-#define segment_holds(S, A)\
- ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
-
-/* Return segment holding given address */
-static msegmentptr segment_holding(mstate m, char* addr) {
- msegmentptr sp = &m->seg;
- for (;;) {
- if (addr >= sp->base && addr < sp->base + sp->size)
- return sp;
- if ((sp = sp->next) == 0)
- return 0;
- }
-}
-
-/* Return true if segment contains a segment link */
-static int has_segment_link(mstate m, msegmentptr ss) {
- msegmentptr sp = &m->seg;
- for (;;) {
- if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
- return 1;
- if ((sp = sp->next) == 0)
- return 0;
- }
-}
-
-#ifndef MORECORE_CANNOT_TRIM
-#define should_trim(M,s) ((s) > (M)->trim_check)
-#else /* MORECORE_CANNOT_TRIM */
-#define should_trim(M,s) (0)
-#endif /* MORECORE_CANNOT_TRIM */
-
-/*
- TOP_FOOT_SIZE is padding at the end of a segment, including space
- that may be needed to place segment records and fenceposts when new
- noncontiguous segments are added.
-*/
-#define TOP_FOOT_SIZE\
- (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
-
-
-/* ------------------------------- Hooks -------------------------------- */
-
-/*
- PREACTION should be defined to return 0 on success, and nonzero on
- failure. If you are not using locking, you can redefine these to do
- anything you like.
-*/
-
-#if USE_LOCKS
-
-#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
-#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
-#else /* USE_LOCKS */
-
-#ifndef PREACTION
-#define PREACTION(M) (0)
-#endif /* PREACTION */
-
-#ifndef POSTACTION
-#define POSTACTION(M)
-#endif /* POSTACTION */
-
-#endif /* USE_LOCKS */
-
-/*
- CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
- USAGE_ERROR_ACTION is triggered on detected bad frees and
- reallocs. The argument p is an address that might have triggered the
- fault. It is ignored by the two predefined actions, but might be
- useful in custom actions that try to help diagnose errors.
-*/
-
-#if PROCEED_ON_ERROR
-
-/* A count of the number of corruption errors causing resets */
-int malloc_corruption_error_count;
-
-/* default corruption action */
-static void reset_on_error(mstate m);
-
-#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m)
-#define USAGE_ERROR_ACTION(m, p)
-
-#else /* PROCEED_ON_ERROR */
-
-#ifndef CORRUPTION_ERROR_ACTION
-#define CORRUPTION_ERROR_ACTION(m) ABORT
-#endif /* CORRUPTION_ERROR_ACTION */
-
-#ifndef USAGE_ERROR_ACTION
-#define USAGE_ERROR_ACTION(m,p) ABORT
-#endif /* USAGE_ERROR_ACTION */
-
-#endif /* PROCEED_ON_ERROR */
-
-/* -------------------------- Debugging setup ---------------------------- */
-
-#if ! DEBUG
-
-#define check_free_chunk(M,P)
-#define check_inuse_chunk(M,P)
-#define check_malloced_chunk(M,P,N)
-#define check_mmapped_chunk(M,P)
-#define check_malloc_state(M)
-#define check_top_chunk(M,P)
-
-#else /* DEBUG */
-#define check_free_chunk(M,P) do_check_free_chunk(M,P)
-#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P)
-#define check_top_chunk(M,P) do_check_top_chunk(M,P)
-#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
-#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P)
-#define check_malloc_state(M) do_check_malloc_state(M)
-
-static void do_check_any_chunk(mstate m, mchunkptr p);
-static void do_check_top_chunk(mstate m, mchunkptr p);
-static void do_check_mmapped_chunk(mstate m, mchunkptr p);
-static void do_check_inuse_chunk(mstate m, mchunkptr p);
-static void do_check_free_chunk(mstate m, mchunkptr p);
-static void do_check_malloced_chunk(mstate m, void* mem, size_t s);
-static void do_check_tree(mstate m, tchunkptr t);
-static void do_check_treebin(mstate m, bindex_t i);
-static void do_check_smallbin(mstate m, bindex_t i);
-static void do_check_malloc_state(mstate m);
-static int bin_find(mstate m, mchunkptr x);
-static size_t traverse_and_check(mstate m);
-#endif /* DEBUG */
-
-/* ---------------------------- Indexing Bins ---------------------------- */
-
-#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
-#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT)
-#define small_index2size(i) ((i) << SMALLBIN_SHIFT)
-#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
-
-/* addressing by index. See above about smallbin repositioning */
-#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
-#define treebin_at(M,i) (&((M)->treebins[i]))
-
-/* assign tree index for size S to variable I. Use x86 asm if possible */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define compute_tree_index(S, I)\
-{\
- unsigned int X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int K;\
- __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\
- I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
- }\
-}
-
-#elif defined (__INTEL_COMPILER)
-#define compute_tree_index(S, I)\
-{\
- size_t X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int K = _bit_scan_reverse (X); \
- I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
- }\
-}
-
-#elif defined(_MSC_VER) && _MSC_VER>=1300
-#define compute_tree_index(S, I)\
-{\
- size_t X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int K;\
- _BitScanReverse((DWORD *) &K, (DWORD) X);\
- I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
- }\
-}
-
-#else /* GNUC */
-#define compute_tree_index(S, I)\
-{\
- size_t X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int Y = (unsigned int)X;\
- unsigned int N = ((Y - 0x100) >> 16) & 8;\
- unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
- N += K;\
- N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
- K = 14 - N + ((Y <<= K) >> 15);\
- I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
- }\
-}
-#endif /* GNUC */
-
-/* Bit representing maximum resolved size in a treebin at i */
-#define bit_for_tree_index(i) \
- (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
-
-/* Shift placing maximum resolved bit in a treebin at i as sign bit */
-#define leftshift_for_tree_index(i) \
- ((i == NTREEBINS-1)? 0 : \
- ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
-
-/* The size of the smallest chunk held in bin with index i */
-#define minsize_for_tree_index(i) \
- ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
- (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
-
-
-/* ------------------------ Operations on bin maps ----------------------- */
-
-/* bit corresponding to given index */
-#define idx2bit(i) ((binmap_t)(1) << (i))
-
-/* Mark/Clear bits with given index */
-#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i))
-#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i))
-#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i))
-
-#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i))
-#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i))
-#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i))
-
-/* isolate the least set bit of a bitmap */
-#define least_bit(x) ((x) & -(x))
-
-/* mask with all bits to left of least bit of x on */
-#define left_bits(x) ((x<<1) | -(x<<1))
-
-/* mask with all bits to left of or equal to least bit of x on */
-#define same_or_left_bits(x) ((x) | -(x))
-
-/* index corresponding to given bit. Use x86 asm if possible */
-
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define compute_bit2idx(X, I)\
-{\
- unsigned int J;\
- __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\
- I = (bindex_t)J;\
-}
-
-#elif defined (__INTEL_COMPILER)
-#define compute_bit2idx(X, I)\
-{\
- unsigned int J;\
- J = _bit_scan_forward (X); \
- I = (bindex_t)J;\
-}
-
-#elif defined(_MSC_VER) && _MSC_VER>=1300
-#define compute_bit2idx(X, I)\
-{\
- unsigned int J;\
- _BitScanForward((DWORD *) &J, X);\
- I = (bindex_t)J;\
-}
-
-#elif USE_BUILTIN_FFS
-#define compute_bit2idx(X, I) I = ffs(X)-1
-
-#else
-#define compute_bit2idx(X, I)\
-{\
- unsigned int Y = X - 1;\
- unsigned int K = Y >> (16-4) & 16;\
- unsigned int N = K; Y >>= K;\
- N += K = Y >> (8-3) & 8; Y >>= K;\
- N += K = Y >> (4-2) & 4; Y >>= K;\
- N += K = Y >> (2-1) & 2; Y >>= K;\
- N += K = Y >> (1-0) & 1; Y >>= K;\
- I = (bindex_t)(N + Y);\
-}
-#endif /* GNUC */
-
-
-/* ----------------------- Runtime Check Support ------------------------- */
-
-/*
- For security, the main invariant is that malloc/free/etc never
- writes to a static address other than malloc_state, unless static
- malloc_state itself has been corrupted, which cannot occur via
- malloc (because of these checks). In essence this means that we
- believe all pointers, sizes, maps etc held in malloc_state, but
- check all of those linked or offsetted from other embedded data
- structures. These checks are interspersed with main code in a way
- that tends to minimize their run-time cost.
-
- When FOOTERS is defined, in addition to range checking, we also
- verify footer fields of inuse chunks, which can be used guarantee
- that the mstate controlling malloc/free is intact. This is a
- streamlined version of the approach described by William Robertson
- et al in "Run-time Detection of Heap-based Overflows" LISA'03
- http://www.usenix.org/events/lisa03/tech/robertson.html The footer
- of an inuse chunk holds the xor of its mstate and a random seed,
- that is checked upon calls to free() and realloc(). This is
- (probablistically) unguessable from outside the program, but can be
- computed by any code successfully malloc'ing any chunk, so does not
- itself provide protection against code that has already broken
- security through some other means. Unlike Robertson et al, we
- always dynamically check addresses of all offset chunks (previous,
- next, etc). This turns out to be cheaper than relying on hashes.
-*/
-
-#if !INSECURE
-/* Check if address a is at least as high as any from MORECORE or MMAP */
-#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
-/* Check if address of next chunk n is higher than base chunk p */
-#define ok_next(p, n) ((char*)(p) < (char*)(n))
-/* Check if p has inuse status */
-#define ok_inuse(p) is_inuse(p)
-/* Check if p has its pinuse bit on */
-#define ok_pinuse(p) pinuse(p)
-
-#else /* !INSECURE */
-#define ok_address(M, a) (1)
-#define ok_next(b, n) (1)
-#define ok_inuse(p) (1)
-#define ok_pinuse(p) (1)
-#endif /* !INSECURE */
-
-#if (FOOTERS && !INSECURE)
-/* Check if (alleged) mstate m has expected magic field */
-#define ok_magic(M) ((M)->magic == mparams.magic)
-#else /* (FOOTERS && !INSECURE) */
-#define ok_magic(M) (1)
-#endif /* (FOOTERS && !INSECURE) */
-
-
-/* In gcc, use __builtin_expect to minimize impact of checks */
-#if !INSECURE
-#if defined(__GNUC__) && __GNUC__ >= 3
-#define RTCHECK(e) __builtin_expect(e, 1)
-#else /* GNUC */
-#define RTCHECK(e) (e)
-#endif /* GNUC */
-#else /* !INSECURE */
-#define RTCHECK(e) (1)
-#endif /* !INSECURE */
-
-/* macros to set up inuse chunks with or without footers */
-
-#if !FOOTERS
-
-#define mark_inuse_foot(M,p,s)
-
-/* Macros for setting head/foot of non-mmapped chunks */
-
-/* Set cinuse bit and pinuse bit of next chunk */
-#define set_inuse(M,p,s)\
- ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
- ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
-
-/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
-#define set_inuse_and_pinuse(M,p,s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
- ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
-
-/* Set size, cinuse and pinuse bit of this chunk */
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
-
-#else /* FOOTERS */
-
-/* Set foot of inuse chunk to be xor of mstate and seed */
-#define mark_inuse_foot(M,p,s)\
- (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
-
-#define get_mstate_for(p)\
- ((mstate)(((mchunkptr)((char*)(p) +\
- (chunksize(p))))->prev_foot ^ mparams.magic))
-
-#define set_inuse(M,p,s)\
- ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
- (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
- mark_inuse_foot(M,p,s))
-
-#define set_inuse_and_pinuse(M,p,s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
- (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
- mark_inuse_foot(M,p,s))
-
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
- mark_inuse_foot(M, p, s))
-
-#endif /* !FOOTERS */
-
-/* ---------------------------- setting mparams -------------------------- */
-
-#ifdef ENABLE_LARGE_PAGES
-typedef size_t (WINAPI *GetLargePageMinimum_t)(void);
-#endif
-
-/* Initialize mparams */
-static int init_mparams(void) {
-#ifdef NEED_GLOBAL_LOCK_INIT
- if (malloc_global_mutex_status <= 0)
- init_malloc_global_mutex();
-#endif
-
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- if (mparams.magic == 0) {
- size_t magic;
- size_t psize;
- size_t gsize;
-
-#ifndef WIN32
- psize = malloc_getpagesize;
- gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
-#else /* WIN32 */
- {
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
- psize = system_info.dwPageSize;
- gsize = ((DEFAULT_GRANULARITY != 0)?
- DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
-#ifdef ENABLE_LARGE_PAGES
- {
- GetLargePageMinimum_t GetLargePageMinimum_ = (GetLargePageMinimum_t) GetProcAddress(GetModuleHandle(__T("kernel32.dll")), "GetLargePageMinimum");
- if(GetLargePageMinimum_) {
- size_t largepagesize = GetLargePageMinimum_();
- if(largepagesize) {
- psize = largepagesize;
- gsize = ((DEFAULT_GRANULARITY != 0)?
- DEFAULT_GRANULARITY : largepagesize);
- if(gsize < largepagesize) gsize = largepagesize;
- }
- }
- }
-#endif
- }
-#endif /* WIN32 */
-
- /* Sanity-check configuration:
- size_t must be unsigned and as wide as pointer type.
- ints must be at least 4 bytes.
- alignment must be at least 8.
- Alignment, min chunk size, and page size must all be powers of 2.
- */
- if ((sizeof(size_t) != sizeof(char*)) ||
- (MAX_SIZE_T < MIN_CHUNK_SIZE) ||
- (sizeof(int) < 4) ||
- (MALLOC_ALIGNMENT < (size_t)8U) ||
- ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
- ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) ||
- ((gsize & (gsize-SIZE_T_ONE)) != 0) ||
- ((psize & (psize-SIZE_T_ONE)) != 0))
- ABORT;
-
- mparams.granularity = gsize;
- mparams.page_size = psize;
- mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
- mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
-#if MORECORE_CONTIGUOUS
- mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
-#else /* MORECORE_CONTIGUOUS */
- mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
-#endif /* MORECORE_CONTIGUOUS */
-
-#if !ONLY_MSPACES
- /* Set up lock for main malloc area */
- gm->mflags = mparams.default_mflags;
- INITIAL_LOCK(&gm->mutex);
-#endif
-
- {
-#if USE_DEV_RANDOM
- int fd;
- unsigned char buf[sizeof(size_t)];
- /* Try to use /dev/urandom, else fall back on using time */
- if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
- read(fd, buf, sizeof(buf)) == sizeof(buf)) {
- magic = *((size_t *) buf);
- close(fd);
- }
- else
-#endif /* USE_DEV_RANDOM */
-#ifdef WIN32
- magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
-#else
- magic = (size_t)(time(0) ^ (size_t)0x55555555U);
-#endif
- magic |= (size_t)8U; /* ensure nonzero */
- magic &= ~(size_t)7U; /* improve chances of fault for bad values */
- mparams.magic = magic;
- }
- }
-
- RELEASE_MALLOC_GLOBAL_LOCK();
- return 1;
-}
-
-/* support for mallopt */
-static int change_mparam(int param_number, int value) {
- size_t val;
- ensure_initialization();
- val = (value == -1)? MAX_SIZE_T : (size_t)value;
- switch(param_number) {
- case M_TRIM_THRESHOLD:
- mparams.trim_threshold = val;
- return 1;
- case M_GRANULARITY:
- if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
- mparams.granularity = val;
- return 1;
- }
- else
- return 0;
- case M_MMAP_THRESHOLD:
- mparams.mmap_threshold = val;
- return 1;
- default:
- return 0;
- }
-}
-
-#if DEBUG
-/* ------------------------- Debugging Support --------------------------- */
-
-/* Check properties of any chunk, whether free, inuse, mmapped etc */
-static void do_check_any_chunk(mstate m, mchunkptr p) {
- assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
- assert(ok_address(m, p));
-}
-
-/* Check properties of top chunk */
-static void do_check_top_chunk(mstate m, mchunkptr p) {
- msegmentptr sp = segment_holding(m, (char*)p);
- size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
- assert(sp != 0);
- assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
- assert(ok_address(m, p));
- assert(sz == m->topsize);
- assert(sz > 0);
- assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
- assert(pinuse(p));
- assert(!pinuse(chunk_plus_offset(p, sz)));
-}
-
-/* Check properties of (inuse) mmapped chunks */
-static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
- size_t sz = chunksize(p);
- size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
- assert(is_mmapped(p));
- assert(use_mmap(m));
- assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
- assert(ok_address(m, p));
- assert(!is_small(sz));
- assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
- assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
- assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
-}
-
-/* Check properties of inuse chunks */
-static void do_check_inuse_chunk(mstate m, mchunkptr p) {
- do_check_any_chunk(m, p);
- assert(is_inuse(p));
- assert(next_pinuse(p));
- /* If not pinuse and not mmapped, previous chunk has OK offset */
- assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
- if (is_mmapped(p))
- do_check_mmapped_chunk(m, p);
-}
-
-/* Check properties of free chunks */
-static void do_check_free_chunk(mstate m, mchunkptr p) {
- size_t sz = chunksize(p);
- mchunkptr next = chunk_plus_offset(p, sz);
- do_check_any_chunk(m, p);
- assert(!is_inuse(p));
- assert(!next_pinuse(p));
- assert (!is_mmapped(p));
- if (p != m->dv && p != m->top) {
- if (sz >= MIN_CHUNK_SIZE) {
- assert((sz & CHUNK_ALIGN_MASK) == 0);
- assert(is_aligned(chunk2mem(p)));
- assert(next->prev_foot == sz);
- assert(pinuse(p));
- assert (next == m->top || is_inuse(next));
- assert(p->fd->bk == p);
- assert(p->bk->fd == p);
- }
- else /* markers are always of size SIZE_T_SIZE */
- assert(sz == SIZE_T_SIZE);
- }
-}
-
-/* Check properties of malloced chunks at the point they are malloced */
-static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
- size_t sz = p->head & ~INUSE_BITS;
- do_check_inuse_chunk(m, p);
- assert((sz & CHUNK_ALIGN_MASK) == 0);
- assert(sz >= MIN_CHUNK_SIZE);
- assert(sz >= s);
- /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
- assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
- }
-}
-
-/* Check a tree and its subtrees. */
-static void do_check_tree(mstate m, tchunkptr t) {
- tchunkptr head = 0;
- tchunkptr u = t;
- bindex_t tindex = t->index;
- size_t tsize = chunksize(t);
- bindex_t idx;
- compute_tree_index(tsize, idx);
- assert(tindex == idx);
- assert(tsize >= MIN_LARGE_SIZE);
- assert(tsize >= minsize_for_tree_index(idx));
- assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
-
- do { /* traverse through chain of same-sized nodes */
- do_check_any_chunk(m, ((mchunkptr)u));
- assert(u->index == tindex);
- assert(chunksize(u) == tsize);
- assert(!is_inuse(u));
- assert(!next_pinuse(u));
- assert(u->fd->bk == u);
- assert(u->bk->fd == u);
- if (u->parent == 0) {
- assert(u->child[0] == 0);
- assert(u->child[1] == 0);
- }
- else {
- assert(head == 0); /* only one node on chain has parent */
- head = u;
- assert(u->parent != u);
- assert (u->parent->child[0] == u ||
- u->parent->child[1] == u ||
- *((tbinptr*)(u->parent)) == u);
- if (u->child[0] != 0) {
- assert(u->child[0]->parent == u);
- assert(u->child[0] != u);
- do_check_tree(m, u->child[0]);
- }
- if (u->child[1] != 0) {
- assert(u->child[1]->parent == u);
- assert(u->child[1] != u);
- do_check_tree(m, u->child[1]);
- }
- if (u->child[0] != 0 && u->child[1] != 0) {
- assert(chunksize(u->child[0]) < chunksize(u->child[1]));
- }
- }
- u = u->fd;
- } while (u != t);
- assert(head != 0);
-}
-
-/* Check all the chunks in a treebin. */
-static void do_check_treebin(mstate m, bindex_t i) {
- tbinptr* tb = treebin_at(m, i);
- tchunkptr t = *tb;
- int empty = (m->treemap & (1U << i)) == 0;
- if (t == 0)
- assert(empty);
- if (!empty)
- do_check_tree(m, t);
-}
-
-/* Check all the chunks in a smallbin. */
-static void do_check_smallbin(mstate m, bindex_t i) {
- sbinptr b = smallbin_at(m, i);
- mchunkptr p = b->bk;
- unsigned int empty = (m->smallmap & (1U << i)) == 0;
- if (p == b)
- assert(empty);
- if (!empty) {
- for (; p != b; p = p->bk) {
- size_t size = chunksize(p);
- mchunkptr q;
- /* each chunk claims to be free */
- do_check_free_chunk(m, p);
- /* chunk belongs in bin */
- assert(small_index(size) == i);
- assert(p->bk == b || chunksize(p->bk) == chunksize(p));
- /* chunk is followed by an inuse chunk */
- q = next_chunk(p);
- if (q->head != FENCEPOST_HEAD)
- do_check_inuse_chunk(m, q);
- }
- }
-}
-
-/* Find x in a bin. Used in other check functions. */
-static int bin_find(mstate m, mchunkptr x) {
- size_t size = chunksize(x);
- if (is_small(size)) {
- bindex_t sidx = small_index(size);
- sbinptr b = smallbin_at(m, sidx);
- if (smallmap_is_marked(m, sidx)) {
- mchunkptr p = b;
- do {
- if (p == x)
- return 1;
- } while ((p = p->fd) != b);
- }
- }
- else {
- bindex_t tidx;
- compute_tree_index(size, tidx);
- if (treemap_is_marked(m, tidx)) {
- tchunkptr t = *treebin_at(m, tidx);
- size_t sizebits = size << leftshift_for_tree_index(tidx);
- while (t != 0 && chunksize(t) != size) {
- t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
- sizebits <<= 1;
- }
- if (t != 0) {
- tchunkptr u = t;
- do {
- if (u == (tchunkptr)x)
- return 1;
- } while ((u = u->fd) != t);
- }
- }
- }
- return 0;
-}
-
-/* Traverse each chunk and check it; return total */
-static size_t traverse_and_check(mstate m) {
- size_t sum = 0;
- if (is_initialized(m)) {
- msegmentptr s = &m->seg;
- sum += m->topsize + TOP_FOOT_SIZE;
- while (s != 0) {
- mchunkptr q = align_as_chunk(s->base);
- mchunkptr lastq = 0;
- assert(pinuse(q));
- while (segment_holds(s, q) &&
- q != m->top && q->head != FENCEPOST_HEAD) {
- sum += chunksize(q);
- if (is_inuse(q)) {
- assert(!bin_find(m, q));
- do_check_inuse_chunk(m, q);
- }
- else {
- assert(q == m->dv || bin_find(m, q));
- assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
- do_check_free_chunk(m, q);
- }
- lastq = q;
- q = next_chunk(q);
- }
- s = s->next;
- }
- }
- return sum;
-}
-
-/* Check all properties of malloc_state. */
-static void do_check_malloc_state(mstate m) {
- bindex_t i;
- size_t total;
- /* check bins */
- for (i = 0; i < NSMALLBINS; ++i)
- do_check_smallbin(m, i);
- for (i = 0; i < NTREEBINS; ++i)
- do_check_treebin(m, i);
-
- if (m->dvsize != 0) { /* check dv chunk */
- do_check_any_chunk(m, m->dv);
- assert(m->dvsize == chunksize(m->dv));
- assert(m->dvsize >= MIN_CHUNK_SIZE);
- assert(bin_find(m, m->dv) == 0);
- }
-
- if (m->top != 0) { /* check top chunk */
- do_check_top_chunk(m, m->top);
- /*assert(m->topsize == chunksize(m->top)); redundant */
- assert(m->topsize > 0);
- assert(bin_find(m, m->top) == 0);
- }
-
- total = traverse_and_check(m);
- assert(total <= m->footprint);
- assert(m->footprint <= m->max_footprint);
-}
-#endif /* DEBUG */
-
-/* ----------------------------- statistics ------------------------------ */
-
-#if !NO_MALLINFO
-static struct mallinfo internal_mallinfo(mstate m) {
- struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
- ensure_initialization();
- if (!PREACTION(m)) {
- check_malloc_state(m);
- if (is_initialized(m)) {
- size_t nfree = SIZE_T_ONE; /* top always free */
- size_t mfree = m->topsize + TOP_FOOT_SIZE;
- size_t sum = mfree;
- msegmentptr s = &m->seg;
- while (s != 0) {
- mchunkptr q = align_as_chunk(s->base);
- while (segment_holds(s, q) &&
- q != m->top && q->head != FENCEPOST_HEAD) {
- size_t sz = chunksize(q);
- sum += sz;
- if (!is_inuse(q)) {
- mfree += sz;
- ++nfree;
- }
- q = next_chunk(q);
- }
- s = s->next;
- }
-
- nm.arena = sum;
- nm.ordblks = nfree;
- nm.hblkhd = m->footprint - sum;
- nm.usmblks = m->max_footprint;
- nm.uordblks = m->footprint - mfree;
- nm.fordblks = mfree;
- nm.keepcost = m->topsize;
- }
-
- POSTACTION(m);
- }
- return nm;
-}
-#endif /* !NO_MALLINFO */
-
-static void internal_malloc_stats(mstate m) {
- ensure_initialization();
- if (!PREACTION(m)) {
- size_t maxfp = 0;
- size_t fp = 0;
- size_t used = 0;
- check_malloc_state(m);
- if (is_initialized(m)) {
- msegmentptr s = &m->seg;
- maxfp = m->max_footprint;
- fp = m->footprint;
- used = fp - (m->topsize + TOP_FOOT_SIZE);
-
- while (s != 0) {
- mchunkptr q = align_as_chunk(s->base);
- while (segment_holds(s, q) &&
- q != m->top && q->head != FENCEPOST_HEAD) {
- if (!is_inuse(q))
- used -= chunksize(q);
- q = next_chunk(q);
- }
- s = s->next;
- }
- }
-
- fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
- fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp));
- fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used));
-
- POSTACTION(m);
- }
-}
-
-/* ----------------------- Operations on smallbins ----------------------- */
-
-/*
- Various forms of linking and unlinking are defined as macros. Even
- the ones for trees, which are very long but have very short typical
- paths. This is ugly but reduces reliance on inlining support of
- compilers.
-*/
-
-/* Link a free chunk into a smallbin */
-#define insert_small_chunk(M, P, S) {\
- bindex_t I = small_index(S);\
- mchunkptr B = smallbin_at(M, I);\
- mchunkptr F = B;\
- assert(S >= MIN_CHUNK_SIZE);\
- if (!smallmap_is_marked(M, I))\
- mark_smallmap(M, I);\
- else if (RTCHECK(ok_address(M, B->fd)))\
- F = B->fd;\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- B->fd = P;\
- F->bk = P;\
- P->fd = F;\
- P->bk = B;\
-}
-
-/* Unlink a chunk from a smallbin */
-#define unlink_small_chunk(M, P, S) {\
- mchunkptr F = P->fd;\
- mchunkptr B = P->bk;\
- bindex_t I = small_index(S);\
- assert(P != B);\
- assert(P != F);\
- assert(chunksize(P) == small_index2size(I));\
- if (F == B)\
- clear_smallmap(M, I);\
- else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\
- (B == smallbin_at(M,I) || ok_address(M, B)))) {\
- F->bk = B;\
- B->fd = F;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
-}
-
-/* Unlink the first chunk from a smallbin */
-#define unlink_first_small_chunk(M, B, P, I) {\
- mchunkptr F = P->fd;\
- assert(P != B);\
- assert(P != F);\
- assert(chunksize(P) == small_index2size(I));\
- if (B == F)\
- clear_smallmap(M, I);\
- else if (RTCHECK(ok_address(M, F))) {\
- B->fd = F;\
- F->bk = B;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
-}
-
-
-
-/* Replace dv node, binning the old one */
-/* Used only when dvsize known to be small */
-#define replace_dv(M, P, S) {\
- size_t DVS = M->dvsize;\
- if (DVS != 0) {\
- mchunkptr DV = M->dv;\
- assert(is_small(DVS));\
- insert_small_chunk(M, DV, DVS);\
- }\
- M->dvsize = S;\
- M->dv = P;\
-}
-
-/* ------------------------- Operations on trees ------------------------- */
-
-/* Insert chunk into tree */
-#define insert_large_chunk(M, X, S) {\
- tbinptr* H;\
- bindex_t I;\
- compute_tree_index(S, I);\
- H = treebin_at(M, I);\
- X->index = I;\
- X->child[0] = X->child[1] = 0;\
- if (!treemap_is_marked(M, I)) {\
- mark_treemap(M, I);\
- *H = X;\
- X->parent = (tchunkptr)H;\
- X->fd = X->bk = X;\
- }\
- else {\
- tchunkptr T = *H;\
- size_t K = S << leftshift_for_tree_index(I);\
- for (;;) {\
- if (chunksize(T) != S) {\
- tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
- K <<= 1;\
- if (*C != 0)\
- T = *C;\
- else if (RTCHECK(ok_address(M, C))) {\
- *C = X;\
- X->parent = T;\
- X->fd = X->bk = X;\
- break;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- break;\
- }\
- }\
- else {\
- tchunkptr F = T->fd;\
- if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
- T->fd = F->bk = X;\
- X->fd = F;\
- X->bk = T;\
- X->parent = 0;\
- break;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- break;\
- }\
- }\
- }\
- }\
-}
-
-/*
- Unlink steps:
-
- 1. If x is a chained node, unlink it from its same-sized fd/bk links
- and choose its bk node as its replacement.
- 2. If x was the last node of its size, but not a leaf node, it must
- be replaced with a leaf node (not merely one with an open left or
- right), to make sure that lefts and rights of descendents
- correspond properly to bit masks. We use the rightmost descendent
- of x. We could use any other leaf, but this is easy to locate and
- tends to counteract removal of leftmosts elsewhere, and so keeps
- paths shorter than minimally guaranteed. This doesn't loop much
- because on average a node in a tree is near the bottom.
- 3. If x is the base of a chain (i.e., has parent links) relink
- x's parent and children to x's replacement (or null if none).
-*/
-
-#define unlink_large_chunk(M, X) {\
- tchunkptr XP = X->parent;\
- tchunkptr R;\
- if (X->bk != X) {\
- tchunkptr F = X->fd;\
- R = X->bk;\
- if (RTCHECK(ok_address(M, F))) {\
- F->bk = R;\
- R->fd = F;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
- else {\
- tchunkptr* RP;\
- if (((R = *(RP = &(X->child[1]))) != 0) ||\
- ((R = *(RP = &(X->child[0]))) != 0)) {\
- tchunkptr* CP;\
- while ((*(CP = &(R->child[1])) != 0) ||\
- (*(CP = &(R->child[0])) != 0)) {\
- R = *(RP = CP);\
- }\
- if (RTCHECK(ok_address(M, RP)))\
- *RP = 0;\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
- }\
- if (XP != 0) {\
- tbinptr* H = treebin_at(M, X->index);\
- if (X == *H) {\
- if ((*H = R) == 0) \
- clear_treemap(M, X->index);\
- }\
- else if (RTCHECK(ok_address(M, XP))) {\
- if (XP->child[0] == X) \
- XP->child[0] = R;\
- else \
- XP->child[1] = R;\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- if (R != 0) {\
- if (RTCHECK(ok_address(M, R))) {\
- tchunkptr C0, C1;\
- R->parent = XP;\
- if ((C0 = X->child[0]) != 0) {\
- if (RTCHECK(ok_address(M, C0))) {\
- R->child[0] = C0;\
- C0->parent = R;\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- if ((C1 = X->child[1]) != 0) {\
- if (RTCHECK(ok_address(M, C1))) {\
- R->child[1] = C1;\
- C1->parent = R;\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
-}
-
-/* Relays to large vs small bin operations */
-
-#define insert_chunk(M, P, S)\
- if (is_small(S)) insert_small_chunk(M, P, S)\
- else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
-
-#define unlink_chunk(M, P, S)\
- if (is_small(S)) unlink_small_chunk(M, P, S)\
- else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
-
-
-/* Relays to internal calls to malloc/free from realloc, memalign etc */
-
-#if ONLY_MSPACES
-#define internal_malloc(m, b) mspace_malloc(m, b)
-#define internal_free(m, mem) mspace_free(m,mem);
-#else /* ONLY_MSPACES */
-#if MSPACES
-#define internal_malloc(m, b)\
- (m == gm)? dlmalloc(b) : mspace_malloc(m, b)
-#define internal_free(m, mem)\
- if (m == gm) dlfree(mem); else mspace_free(m,mem);
-#else /* MSPACES */
-#define internal_malloc(m, b) dlmalloc(b)
-#define internal_free(m, mem) dlfree(mem)
-#endif /* MSPACES */
-#endif /* ONLY_MSPACES */
-
-/* ----------------------- Direct-mmapping chunks ----------------------- */
-
-/*
- Directly mmapped chunks are set up with an offset to the start of
- the mmapped region stored in the prev_foot field of the chunk. This
- allows reconstruction of the required argument to MUNMAP when freed,
- and also allows adjustment of the returned chunk to meet alignment
- requirements (especially in memalign).
-*/
-
-/* Malloc using mmap */
-static void* mmap_alloc(mstate m, size_t nb) {
- size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
- if (mmsize > nb) { /* Check for wrap around 0 */
- char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
- if (mm != CMFAIL) {
- size_t offset = align_offset(chunk2mem(mm));
- size_t psize = mmsize - offset - MMAP_FOOT_PAD;
- mchunkptr p = (mchunkptr)(mm + offset);
- p->prev_foot = offset;
- p->head = psize;
- mark_inuse_foot(m, p, psize);
- chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
- chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
-
- if (m->least_addr == 0 || mm < m->least_addr)
- m->least_addr = mm;
- if ((m->footprint += mmsize) > m->max_footprint)
- m->max_footprint = m->footprint;
- assert(is_aligned(chunk2mem(p)));
- check_mmapped_chunk(m, p);
- return chunk2mem(p);
- }
- }
- return 0;
-}
-
-/* Realloc using mmap */
-static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) {
- size_t oldsize = chunksize(oldp);
- if (is_small(nb)) /* Can't shrink mmap regions below small size */
- return 0;
- /* Keep old chunk if big enough but not too big */
- if (oldsize >= nb + SIZE_T_SIZE &&
- (oldsize - nb) <= (mparams.granularity << 1))
- return oldp;
- else {
- size_t offset = oldp->prev_foot;
- size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
- size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
- char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
- oldmmsize, newmmsize, 1);
- if (cp != CMFAIL) {
- mchunkptr newp = (mchunkptr)(cp + offset);
- size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
- newp->head = psize;
- mark_inuse_foot(m, newp, psize);
- chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
- chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
-
- if (cp < m->least_addr)
- m->least_addr = cp;
- if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
- m->max_footprint = m->footprint;
- check_mmapped_chunk(m, newp);
- return newp;
- }
- }
- return 0;
-}
-
-/* -------------------------- mspace management -------------------------- */
-
-/* Initialize top chunk and its size */
-static void init_top(mstate m, mchunkptr p, size_t psize) {
- /* Ensure alignment */
- size_t offset = align_offset(chunk2mem(p));
- p = (mchunkptr)((char*)p + offset);
- psize -= offset;
-
- m->top = p;
- m->topsize = psize;
- p->head = psize | PINUSE_BIT;
- /* set size of fake trailing chunk holding overhead space only once */
- chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
- m->trim_check = mparams.trim_threshold; /* reset on each update */
-}
-
-/* Initialize bins for a new mstate that is otherwise zeroed out */
-static void init_bins(mstate m) {
- /* Establish circular links for smallbins */
- bindex_t i;
- for (i = 0; i < NSMALLBINS; ++i) {
- sbinptr bin = smallbin_at(m,i);
- bin->fd = bin->bk = bin;
- }
-}
-
-#if PROCEED_ON_ERROR
-
-/* default corruption action */
-static void reset_on_error(mstate m) {
- int i;
- ++malloc_corruption_error_count;
- /* Reinitialize fields to forget about all memory */
- m->smallbins = m->treebins = 0;
- m->dvsize = m->topsize = 0;
- m->seg.base = 0;
- m->seg.size = 0;
- m->seg.next = 0;
- m->top = m->dv = 0;
- for (i = 0; i < NTREEBINS; ++i)
- *treebin_at(m, i) = 0;
- init_bins(m);
-}
-#endif /* PROCEED_ON_ERROR */
-
-/* Allocate chunk and prepend remainder with chunk in successor base. */
-static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
- size_t nb) {
- mchunkptr p = align_as_chunk(newbase);
- mchunkptr oldfirst = align_as_chunk(oldbase);
- size_t psize = (char*)oldfirst - (char*)p;
- mchunkptr q = chunk_plus_offset(p, nb);
- size_t qsize = psize - nb;
- set_size_and_pinuse_of_inuse_chunk(m, p, nb);
-
- assert((char*)oldfirst > (char*)q);
- assert(pinuse(oldfirst));
- assert(qsize >= MIN_CHUNK_SIZE);
-
- /* consolidate remainder with first chunk of old base */
- if (oldfirst == m->top) {
- size_t tsize = m->topsize += qsize;
- m->top = q;
- q->head = tsize | PINUSE_BIT;
- check_top_chunk(m, q);
- }
- else if (oldfirst == m->dv) {
- size_t dsize = m->dvsize += qsize;
- m->dv = q;
- set_size_and_pinuse_of_free_chunk(q, dsize);
- }
- else {
- if (!is_inuse(oldfirst)) {
- size_t nsize = chunksize(oldfirst);
- unlink_chunk(m, oldfirst, nsize);
- oldfirst = chunk_plus_offset(oldfirst, nsize);
- qsize += nsize;
- }
- set_free_with_pinuse(q, qsize, oldfirst);
- insert_chunk(m, q, qsize);
- check_free_chunk(m, q);
- }
-
- check_malloced_chunk(m, chunk2mem(p), nb);
- return chunk2mem(p);
-}
-
-/* Add a segment to hold a new noncontiguous region */
-static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
- /* Determine locations and sizes of segment, fenceposts, old top */
- char* old_top = (char*)m->top;
- msegmentptr oldsp = segment_holding(m, old_top);
- char* old_end = oldsp->base + oldsp->size;
- size_t ssize = pad_request(sizeof(struct malloc_segment));
- char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
- size_t offset = align_offset(chunk2mem(rawsp));
- char* asp = rawsp + offset;
- char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
- mchunkptr sp = (mchunkptr)csp;
- msegmentptr ss = (msegmentptr)(chunk2mem(sp));
- mchunkptr tnext = chunk_plus_offset(sp, ssize);
- mchunkptr p = tnext;
- int nfences = 0;
-
- /* reset top to new space */
- init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
-
- /* Set up segment record */
- assert(is_aligned(ss));
- set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
- *ss = m->seg; /* Push current record */
- m->seg.base = tbase;
- m->seg.size = tsize;
- m->seg.sflags = mmapped;
- m->seg.next = ss;
-
- /* Insert trailing fenceposts */
- for (;;) {
- mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
- p->head = FENCEPOST_HEAD;
- ++nfences;
- if ((char*)(&(nextp->head)) < old_end)
- p = nextp;
- else
- break;
- }
- assert(nfences >= 2);
-
- /* Insert the rest of old top into a bin as an ordinary free chunk */
- if (csp != old_top) {
- mchunkptr q = (mchunkptr)old_top;
- size_t psize = csp - old_top;
- mchunkptr tn = chunk_plus_offset(q, psize);
- set_free_with_pinuse(q, psize, tn);
- insert_chunk(m, q, psize);
- }
-
- check_top_chunk(m, m->top);
-}
-
-/* -------------------------- System allocation -------------------------- */
-
-/* Get memory from system using MORECORE or MMAP */
-static void* sys_alloc(mstate m, size_t nb) {
- char* tbase = CMFAIL;
- size_t tsize = 0;
- flag_t mmap_flag = 0;
-
- ensure_initialization();
-
- /* Directly map large chunks, but only if already initialized */
- if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
- void* mem = mmap_alloc(m, nb);
- if (mem != 0)
- return mem;
- }
-
- /*
- Try getting memory in any of three ways (in most-preferred to
- least-preferred order):
- 1. A call to MORECORE that can normally contiguously extend memory.
- (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
- or main space is mmapped or a previous contiguous call failed)
- 2. A call to MMAP new space (disabled if not HAVE_MMAP).
- Note that under the default settings, if MORECORE is unable to
- fulfill a request, and HAVE_MMAP is true, then mmap is
- used as a noncontiguous system allocator. This is a useful backup
- strategy for systems with holes in address spaces -- in this case
- sbrk cannot contiguously expand the heap, but mmap may be able to
- find space.
- 3. A call to MORECORE that cannot usually contiguously extend memory.
- (disabled if not HAVE_MORECORE)
-
- In all cases, we need to request enough bytes from system to ensure
- we can malloc nb bytes upon success, so pad with enough space for
- top_foot, plus alignment-pad to make sure we don't lose bytes if
- not on boundary, and round this up to a granularity unit.
- */
-
- if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
- char* br = CMFAIL;
- msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
- size_t asize = 0;
- ACQUIRE_MALLOC_GLOBAL_LOCK();
-
- if (ss == 0) { /* First time through or recovery */
- char* base = (char*)CALL_MORECORE(0);
- if (base != CMFAIL) {
- asize = granularity_align(nb + SYS_ALLOC_PADDING);
- /* Adjust to end on a page boundary */
- if (!is_page_aligned(base))
- asize += (page_align((size_t)base) - (size_t)base);
- /* Can't call MORECORE if size is negative when treated as signed */
- if (asize < HALF_MAX_SIZE_T &&
- (br = (char*)(CALL_MORECORE(asize))) == base) {
- tbase = base;
- tsize = asize;
- }
- }
- }
- else {
- /* Subtract out existing available top space from MORECORE request. */
- asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
- /* Use mem here only if it did continuously extend old space */
- if (asize < HALF_MAX_SIZE_T &&
- (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) {
- tbase = br;
- tsize = asize;
- }
- }
-
- if (tbase == CMFAIL) { /* Cope with partial failure */
- if (br != CMFAIL) { /* Try to use/extend the space we did get */
- if (asize < HALF_MAX_SIZE_T &&
- asize < nb + SYS_ALLOC_PADDING) {
- size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize);
- if (esize < HALF_MAX_SIZE_T) {
- char* end = (char*)CALL_MORECORE(esize);
- if (end != CMFAIL)
- asize += esize;
- else { /* Can't use; try to release */
- (void) CALL_MORECORE(-asize);
- br = CMFAIL;
- }
- }
- }
- }
- if (br != CMFAIL) { /* Use the space we did get */
- tbase = br;
- tsize = asize;
- }
- else
- disable_contiguous(m); /* Don't try contiguous path in the future */
- }
-
- RELEASE_MALLOC_GLOBAL_LOCK();
- }
-
- if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */
- size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING);
- if (rsize > nb) { /* Fail if wraps around zero */
- char* mp = (char*)(CALL_MMAP(rsize));
- if (mp != CMFAIL) {
- tbase = mp;
- tsize = rsize;
- mmap_flag = USE_MMAP_BIT;
- }
- }
- }
-
- if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
- size_t asize = granularity_align(nb + SYS_ALLOC_PADDING);
- if (asize < HALF_MAX_SIZE_T) {
- char* br = CMFAIL;
- char* end = CMFAIL;
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- br = (char*)(CALL_MORECORE(asize));
- end = (char*)(CALL_MORECORE(0));
- RELEASE_MALLOC_GLOBAL_LOCK();
- if (br != CMFAIL && end != CMFAIL && br < end) {
- size_t ssize = end - br;
- if (ssize > nb + TOP_FOOT_SIZE) {
- tbase = br;
- tsize = ssize;
- }
- }
- }
- }
-
- if (tbase != CMFAIL) {
-
- if ((m->footprint += tsize) > m->max_footprint)
- m->max_footprint = m->footprint;
-
- if (!is_initialized(m)) { /* first-time initialization */
- if (m->least_addr == 0 || tbase < m->least_addr)
- m->least_addr = tbase;
- m->seg.base = tbase;
- m->seg.size = tsize;
- m->seg.sflags = mmap_flag;
- m->magic = mparams.magic;
- m->release_checks = MAX_RELEASE_CHECK_RATE;
- init_bins(m);
-#if !ONLY_MSPACES
- if (is_global(m))
- init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
- else
-#endif
- {
- /* Offset top by embedded malloc_state */
- mchunkptr mn = next_chunk(mem2chunk(m));
- init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
- }
- }
-
- else {
- /* Try to merge with an existing segment */
- msegmentptr sp = &m->seg;
- /* Only consider most recent segment if traversal suppressed */
- while (sp != 0 && tbase != sp->base + sp->size)
- sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
- if (sp != 0 &&
- !is_extern_segment(sp) &&
- (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
- segment_holds(sp, m->top)) { /* append */
- sp->size += tsize;
- init_top(m, m->top, m->topsize + tsize);
- }
- else {
- if (tbase < m->least_addr)
- m->least_addr = tbase;
- sp = &m->seg;
- while (sp != 0 && sp->base != tbase + tsize)
- sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
- if (sp != 0 &&
- !is_extern_segment(sp) &&
- (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
- char* oldbase = sp->base;
- sp->base = tbase;
- sp->size += tsize;
- return prepend_alloc(m, tbase, oldbase, nb);
- }
- else
- add_segment(m, tbase, tsize, mmap_flag);
- }
- }
-
- if (nb < m->topsize) { /* Allocate from new or extended top space */
- size_t rsize = m->topsize -= nb;
- mchunkptr p = m->top;
- mchunkptr r = m->top = chunk_plus_offset(p, nb);
- r->head = rsize | PINUSE_BIT;
- set_size_and_pinuse_of_inuse_chunk(m, p, nb);
- check_top_chunk(m, m->top);
- check_malloced_chunk(m, chunk2mem(p), nb);
- return chunk2mem(p);
- }
- }
-
- MALLOC_FAILURE_ACTION;
- return 0;
-}
-
-/* ----------------------- system deallocation -------------------------- */
-
-/* Unmap and unlink any mmapped segments that don't contain used chunks */
-static size_t release_unused_segments(mstate m) {
- size_t released = 0;
- int nsegs = 0;
- msegmentptr pred = &m->seg;
- msegmentptr sp = pred->next;
- while (sp != 0) {
- char* base = sp->base;
- size_t size = sp->size;
- msegmentptr next = sp->next;
- ++nsegs;
- if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
- mchunkptr p = align_as_chunk(base);
- size_t psize = chunksize(p);
- /* Can unmap if first chunk holds entire segment and not pinned */
- if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
- tchunkptr tp = (tchunkptr)p;
- assert(segment_holds(sp, (char*)sp));
- if (p == m->dv) {
- m->dv = 0;
- m->dvsize = 0;
- }
- else {
- unlink_large_chunk(m, tp);
- }
- if (CALL_MUNMAP(base, size) == 0) {
- released += size;
- m->footprint -= size;
- /* unlink obsoleted record */
- sp = pred;
- sp->next = next;
- }
- else { /* back out if cannot unmap */
- insert_large_chunk(m, tp, psize);
- }
- }
- }
- if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
- break;
- pred = sp;
- sp = next;
- }
- /* Reset check counter */
- m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)?
- nsegs : MAX_RELEASE_CHECK_RATE);
- return released;
-}
-
-static int sys_trim(mstate m, size_t pad) {
- size_t released = 0;
- ensure_initialization();
- if (pad < MAX_REQUEST && is_initialized(m)) {
- pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
-
- if (m->topsize > pad) {
- /* Shrink top space in granularity-size units, keeping at least one */
- size_t unit = mparams.granularity;
- size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
- SIZE_T_ONE) * unit;
- msegmentptr sp = segment_holding(m, (char*)m->top);
-
- if (!is_extern_segment(sp)) {
- if (is_mmapped_segment(sp)) {
- if (HAVE_MMAP &&
- sp->size >= extra &&
- !has_segment_link(m, sp)) { /* can't shrink if pinned */
- size_t newsize = sp->size - extra;
- /* Prefer mremap, fall back to munmap */
- if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
- (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
- released = extra;
- }
- }
- }
- else if (HAVE_MORECORE) {
- if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
- extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- {
- /* Make sure end of memory is where we last set it. */
- char* old_br = (char*)(CALL_MORECORE(0));
- if (old_br == sp->base + sp->size) {
- char* rel_br = (char*)(CALL_MORECORE(-extra));
- char* new_br = (char*)(CALL_MORECORE(0));
- if (rel_br != CMFAIL && new_br < old_br)
- released = old_br - new_br;
- }
- }
- RELEASE_MALLOC_GLOBAL_LOCK();
- }
- }
-
- if (released != 0) {
- sp->size -= released;
- m->footprint -= released;
- init_top(m, m->top, m->topsize - released);
- check_top_chunk(m, m->top);
- }
- }
-
- /* Unmap any unused mmapped segments */
- if (HAVE_MMAP)
- released += release_unused_segments(m);
-
- /* On failure, disable autotrim to avoid repeated failed future calls */
- if (released == 0 && m->topsize > m->trim_check)
- m->trim_check = MAX_SIZE_T;
- }
-
- return (released != 0)? 1 : 0;
-}
-
-
-/* ---------------------------- malloc support --------------------------- */
-
-/* allocate a large request from the best fitting chunk in a treebin */
-static void* tmalloc_large(mstate m, size_t nb) {
- tchunkptr v = 0;
- size_t rsize = -nb; /* Unsigned negation */
- tchunkptr t;
- bindex_t idx;
- compute_tree_index(nb, idx);
- if ((t = *treebin_at(m, idx)) != 0) {
- /* Traverse tree for this bin looking for node with size == nb */
- size_t sizebits = nb << leftshift_for_tree_index(idx);
- tchunkptr rst = 0; /* The deepest untaken right subtree */
- for (;;) {
- tchunkptr rt;
- size_t trem = chunksize(t) - nb;
- if (trem < rsize) {
- v = t;
- if ((rsize = trem) == 0)
- break;
- }
- rt = t->child[1];
- t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
- if (rt != 0 && rt != t)
- rst = rt;
- if (t == 0) {
- t = rst; /* set t to least subtree holding sizes > nb */
- break;
- }
- sizebits <<= 1;
- }
- }
- if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
- binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
- if (leftbits != 0) {
- bindex_t i;
- binmap_t leastbit = least_bit(leftbits);
- compute_bit2idx(leastbit, i);
- t = *treebin_at(m, i);
- }
- }
-
- while (t != 0) { /* find smallest of tree or subtree */
- size_t trem = chunksize(t) - nb;
- if (trem < rsize) {
- rsize = trem;
- v = t;
- }
- t = leftmost_child(t);
- }
-
- /* If dv is a better fit, return 0 so malloc will use it */
- if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
- if (RTCHECK(ok_address(m, v))) { /* split */
- mchunkptr r = chunk_plus_offset(v, nb);
- assert(chunksize(v) == rsize + nb);
- if (RTCHECK(ok_next(v, r))) {
- unlink_large_chunk(m, v);
- if (rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(m, v, (rsize + nb));
- else {
- set_size_and_pinuse_of_inuse_chunk(m, v, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- insert_chunk(m, r, rsize);
- }
- return chunk2mem(v);
- }
- }
- CORRUPTION_ERROR_ACTION(m);
- }
- return 0;
-}
-
-/* allocate a small request from the best fitting chunk in a treebin */
-static void* tmalloc_small(mstate m, size_t nb) {
- tchunkptr t, v;
- size_t rsize;
- bindex_t i;
- binmap_t leastbit = least_bit(m->treemap);
- compute_bit2idx(leastbit, i);
- v = t = *treebin_at(m, i);
- rsize = chunksize(t) - nb;
-
- while ((t = leftmost_child(t)) != 0) {
- size_t trem = chunksize(t) - nb;
- if (trem < rsize) {
- rsize = trem;
- v = t;
- }
- }
-
- if (RTCHECK(ok_address(m, v))) {
- mchunkptr r = chunk_plus_offset(v, nb);
- assert(chunksize(v) == rsize + nb);
- if (RTCHECK(ok_next(v, r))) {
- unlink_large_chunk(m, v);
- if (rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(m, v, (rsize + nb));
- else {
- set_size_and_pinuse_of_inuse_chunk(m, v, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- replace_dv(m, r, rsize);
- }
- return chunk2mem(v);
- }
- }
-
- CORRUPTION_ERROR_ACTION(m);
- return 0;
-}
-
-/* --------------------------- realloc support --------------------------- */
-
-static void* internal_realloc(mstate m, void* oldmem, size_t bytes) {
- if (bytes >= MAX_REQUEST) {
- MALLOC_FAILURE_ACTION;
- return 0;
- }
- if (!PREACTION(m)) {
- mchunkptr oldp = mem2chunk(oldmem);
- size_t oldsize = chunksize(oldp);
- mchunkptr next = chunk_plus_offset(oldp, oldsize);
- mchunkptr newp = 0;
- void* extra = 0;
-
- /* Try to either shrink or extend into top. Else malloc-copy-free */
-
- if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) &&
- ok_next(oldp, next) && ok_pinuse(next))) {
- size_t nb = request2size(bytes);
- if (is_mmapped(oldp))
- newp = mmap_resize(m, oldp, nb);
- else if (oldsize >= nb) { /* already big enough */
- size_t rsize = oldsize - nb;
- newp = oldp;
- if (rsize >= MIN_CHUNK_SIZE) {
- mchunkptr remainder = chunk_plus_offset(newp, nb);
- set_inuse(m, newp, nb);
- set_inuse_and_pinuse(m, remainder, rsize);
- extra = chunk2mem(remainder);
- }
- }
- else if (next == m->top && oldsize + m->topsize > nb) {
- /* Expand into top */
- size_t newsize = oldsize + m->topsize;
- size_t newtopsize = newsize - nb;
- mchunkptr newtop = chunk_plus_offset(oldp, nb);
- set_inuse(m, oldp, nb);
- newtop->head = newtopsize |PINUSE_BIT;
- m->top = newtop;
- m->topsize = newtopsize;
- newp = oldp;
- }
- }
- else {
- USAGE_ERROR_ACTION(m, oldmem);
- POSTACTION(m);
- return 0;
- }
-#if DEBUG
- if (newp != 0) {
- check_inuse_chunk(m, newp); /* Check requires lock */
- }
-#endif
-
- POSTACTION(m);
-
- if (newp != 0) {
- if (extra != 0) {
- internal_free(m, extra);
- }
- return chunk2mem(newp);
- }
- else {
- void* newmem = internal_malloc(m, bytes);
- if (newmem != 0) {
- size_t oc = oldsize - overhead_for(oldp);
- memcpy(newmem, oldmem, (oc < bytes)? oc : bytes);
- internal_free(m, oldmem);
- }
- return newmem;
- }
- }
- return 0;
-}
-
-/* --------------------------- memalign support -------------------------- */
-
-static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
- if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */
- return internal_malloc(m, bytes);
- if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
- alignment = MIN_CHUNK_SIZE;
- if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
- size_t a = MALLOC_ALIGNMENT << 1;
- while (a < alignment) a <<= 1;
- alignment = a;
- }
-
- if (bytes >= MAX_REQUEST - alignment) {
- if (m != 0) { /* Test isn't needed but avoids compiler warning */
- MALLOC_FAILURE_ACTION;
- }
- }
- else {
- size_t nb = request2size(bytes);
- size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
- char* mem = (char*)internal_malloc(m, req);
- if (mem != 0) {
- void* leader = 0;
- void* trailer = 0;
- mchunkptr p = mem2chunk(mem);
-
- if (PREACTION(m)) return 0;
- if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */
- /*
- Find an aligned spot inside chunk. Since we need to give
- back leading space in a chunk of at least MIN_CHUNK_SIZE, if
- the first calculation places us at a spot with less than
- MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
- We've allocated enough total room so that this is always
- possible.
- */
- char* br = (char*)mem2chunk((size_t)(((size_t)(mem +
- alignment -
- SIZE_T_ONE)) &
- -alignment));
- char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
- br : br+alignment;
- mchunkptr newp = (mchunkptr)pos;
- size_t leadsize = pos - (char*)(p);
- size_t newsize = chunksize(p) - leadsize;
-
- if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
- newp->prev_foot = p->prev_foot + leadsize;
- newp->head = newsize;
- }
- else { /* Otherwise, give back leader, use the rest */
- set_inuse(m, newp, newsize);
- set_inuse(m, p, leadsize);
- leader = chunk2mem(p);
- }
- p = newp;
- }
-
- /* Give back spare room at the end */
- if (!is_mmapped(p)) {
- size_t size = chunksize(p);
- if (size > nb + MIN_CHUNK_SIZE) {
- size_t remainder_size = size - nb;
- mchunkptr remainder = chunk_plus_offset(p, nb);
- set_inuse(m, p, nb);
- set_inuse(m, remainder, remainder_size);
- trailer = chunk2mem(remainder);
- }
- }
-
- assert (chunksize(p) >= nb);
- assert((((size_t)(chunk2mem(p))) % alignment) == 0);
- check_inuse_chunk(m, p);
- POSTACTION(m);
- if (leader != 0) {
- internal_free(m, leader);
- }
- if (trailer != 0) {
- internal_free(m, trailer);
- }
- return chunk2mem(p);
- }
- }
- return 0;
-}
-
-/* ------------------------ comalloc/coalloc support --------------------- */
-
-static void** ialloc(mstate m,
- size_t n_elements,
- size_t* sizes,
- int opts,
- void* chunks[]) {
- /*
- This provides common support for independent_X routines, handling
- all of the combinations that can result.
-
- The opts arg has:
- bit 0 set if all elements are same size (using sizes[0])
- bit 1 set if elements should be zeroed
- */
-
- size_t element_size; /* chunksize of each element, if all same */
- size_t contents_size; /* total size of elements */
- size_t array_size; /* request size of pointer array */
- void* mem; /* malloced aggregate space */
- mchunkptr p; /* corresponding chunk */
- size_t remainder_size; /* remaining bytes while splitting */
- void** marray; /* either "chunks" or malloced ptr array */
- mchunkptr array_chunk; /* chunk for malloced ptr array */
- flag_t was_enabled; /* to disable mmap */
- size_t size;
- size_t i;
-
- ensure_initialization();
- /* compute array length, if needed */
- if (chunks != 0) {
- if (n_elements == 0)
- return chunks; /* nothing to do */
- marray = chunks;
- array_size = 0;
- }
- else {
- /* if empty req, must still return chunk representing empty array */
- if (n_elements == 0)
- return (void**)internal_malloc(m, 0);
- marray = 0;
- array_size = request2size(n_elements * (sizeof(void*)));
- }
-
- /* compute total element size */
- if (opts & 0x1) { /* all-same-size */
- element_size = request2size(*sizes);
- contents_size = n_elements * element_size;
- }
- else { /* add up all the sizes */
- element_size = 0;
- contents_size = 0;
- for (i = 0; i != n_elements; ++i)
- contents_size += request2size(sizes[i]);
- }
-
- size = contents_size + array_size;
-
- /*
- Allocate the aggregate chunk. First disable direct-mmapping so
- malloc won't use it, since we would not be able to later
- free/realloc space internal to a segregated mmap region.
- */
- was_enabled = use_mmap(m);
- disable_mmap(m);
- mem = internal_malloc(m, size - CHUNK_OVERHEAD);
- if (was_enabled)
- enable_mmap(m);
- if (mem == 0)
- return 0;
-
- if (PREACTION(m)) return 0;
- p = mem2chunk(mem);
- remainder_size = chunksize(p);
-
- assert(!is_mmapped(p));
-
- if (opts & 0x2) { /* optionally clear the elements */
- memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
- }
-
- /* If not provided, allocate the pointer array as final part of chunk */
- if (marray == 0) {
- size_t array_chunk_size;
- array_chunk = chunk_plus_offset(p, contents_size);
- array_chunk_size = remainder_size - contents_size;
- marray = (void**) (chunk2mem(array_chunk));
- set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
- remainder_size = contents_size;
- }
-
- /* split out elements */
- for (i = 0; ; ++i) {
- marray[i] = chunk2mem(p);
- if (i != n_elements-1) {
- if (element_size != 0)
- size = element_size;
- else
- size = request2size(sizes[i]);
- remainder_size -= size;
- set_size_and_pinuse_of_inuse_chunk(m, p, size);
- p = chunk_plus_offset(p, size);
- }
- else { /* the final element absorbs any overallocation slop */
- set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
- break;
- }
- }
-
-#if DEBUG
- if (marray != chunks) {
- /* final element must have exactly exhausted chunk */
- if (element_size != 0) {
- assert(remainder_size == element_size);
- }
- else {
- assert(remainder_size == request2size(sizes[i]));
- }
- check_inuse_chunk(m, mem2chunk(marray));
- }
- for (i = 0; i != n_elements; ++i)
- check_inuse_chunk(m, mem2chunk(marray[i]));
-
-#endif /* DEBUG */
-
- POSTACTION(m);
- return marray;
-}
-
-
-/* -------------------------- public routines ---------------------------- */
-
-#if !ONLY_MSPACES
-
-void* dlmalloc(size_t bytes) {
- /*
- Basic algorithm:
- If a small request (< 256 bytes minus per-chunk overhead):
- 1. If one exists, use a remainderless chunk in associated smallbin.
- (Remainderless means that there are too few excess bytes to
- represent as a chunk.)
- 2. If it is big enough, use the dv chunk, which is normally the
- chunk adjacent to the one used for the most recent small request.
- 3. If one exists, split the smallest available chunk in a bin,
- saving remainder in dv.
- 4. If it is big enough, use the top chunk.
- 5. If available, get memory from system and use it
- Otherwise, for a large request:
- 1. Find the smallest available binned chunk that fits, and use it
- if it is better fitting than dv chunk, splitting if necessary.
- 2. If better fitting than any binned chunk, use the dv chunk.
- 3. If it is big enough, use the top chunk.
- 4. If request size >= mmap threshold, try to directly mmap this chunk.
- 5. If available, get memory from system and use it
-
- The ugly goto's here ensure that postaction occurs along all paths.
- */
-
-#if USE_LOCKS
- ensure_initialization(); /* initialize in sys_alloc if not using locks */
-#endif
-
- if (!PREACTION(gm)) {
- void* mem;
- size_t nb;
- if (bytes <= MAX_SMALL_REQUEST) {
- bindex_t idx;
- binmap_t smallbits;
- nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
- idx = small_index(nb);
- smallbits = gm->smallmap >> idx;
-
- if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
- mchunkptr b, p;
- idx += ~smallbits & 1; /* Uses next bin if idx empty */
- b = smallbin_at(gm, idx);
- p = b->fd;
- assert(chunksize(p) == small_index2size(idx));
- unlink_first_small_chunk(gm, b, p, idx);
- set_inuse_and_pinuse(gm, p, small_index2size(idx));
- mem = chunk2mem(p);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- else if (nb > gm->dvsize) {
- if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
- mchunkptr b, p, r;
- size_t rsize;
- bindex_t i;
- binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
- binmap_t leastbit = least_bit(leftbits);
- compute_bit2idx(leastbit, i);
- b = smallbin_at(gm, i);
- p = b->fd;
- assert(chunksize(p) == small_index2size(i));
- unlink_first_small_chunk(gm, b, p, i);
- rsize = small_index2size(i) - nb;
- /* Fit here cannot be remainderless if 4byte sizes */
- if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(gm, p, small_index2size(i));
- else {
- set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
- r = chunk_plus_offset(p, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- replace_dv(gm, r, rsize);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
- }
- }
- else if (bytes >= MAX_REQUEST)
- nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
- else {
- nb = pad_request(bytes);
- if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
- }
-
- if (nb <= gm->dvsize) {
- size_t rsize = gm->dvsize - nb;
- mchunkptr p = gm->dv;
- if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
- mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
- gm->dvsize = rsize;
- set_size_and_pinuse_of_free_chunk(r, rsize);
- set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
- }
- else { /* exhaust dv */
- size_t dvs = gm->dvsize;
- gm->dvsize = 0;
- gm->dv = 0;
- set_inuse_and_pinuse(gm, p, dvs);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- else if (nb < gm->topsize) { /* Split top */
- size_t rsize = gm->topsize -= nb;
- mchunkptr p = gm->top;
- mchunkptr r = gm->top = chunk_plus_offset(p, nb);
- r->head = rsize | PINUSE_BIT;
- set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
- mem = chunk2mem(p);
- check_top_chunk(gm, gm->top);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- mem = sys_alloc(gm, nb);
-
- postaction:
- POSTACTION(gm);
- return mem;
- }
-
- return 0;
-}
-
-void dlfree(void* mem) {
- /*
- Consolidate freed chunks with preceeding or succeeding bordering
- free chunks, if they exist, and then place in a bin. Intermixed
- with special cases for top, dv, mmapped chunks, and usage errors.
- */
-
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
-#if FOOTERS
- mstate fm = get_mstate_for(p);
- if (!ok_magic(fm)) {
- USAGE_ERROR_ACTION(fm, p);
- return;
- }
-#else /* FOOTERS */
-#define fm gm
-#endif /* FOOTERS */
- if (!PREACTION(fm)) {
- check_inuse_chunk(fm, p);
- if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
- size_t psize = chunksize(p);
- mchunkptr next = chunk_plus_offset(p, psize);
- if (!pinuse(p)) {
- size_t prevsize = p->prev_foot;
- if (is_mmapped(p)) {
- psize += prevsize + MMAP_FOOT_PAD;
- if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
- fm->footprint -= psize;
- goto postaction;
- }
- else {
- mchunkptr prev = chunk_minus_offset(p, prevsize);
- psize += prevsize;
- p = prev;
- if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
- if (p != fm->dv) {
- unlink_chunk(fm, p, prevsize);
- }
- else if ((next->head & INUSE_BITS) == INUSE_BITS) {
- fm->dvsize = psize;
- set_free_with_pinuse(p, psize, next);
- goto postaction;
- }
- }
- else
- goto erroraction;
- }
- }
-
- if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
- if (!cinuse(next)) { /* consolidate forward */
- if (next == fm->top) {
- size_t tsize = fm->topsize += psize;
- fm->top = p;
- p->head = tsize | PINUSE_BIT;
- if (p == fm->dv) {
- fm->dv = 0;
- fm->dvsize = 0;
- }
- if (should_trim(fm, tsize))
- sys_trim(fm, 0);
- goto postaction;
- }
- else if (next == fm->dv) {
- size_t dsize = fm->dvsize += psize;
- fm->dv = p;
- set_size_and_pinuse_of_free_chunk(p, dsize);
- goto postaction;
- }
- else {
- size_t nsize = chunksize(next);
- psize += nsize;
- unlink_chunk(fm, next, nsize);
- set_size_and_pinuse_of_free_chunk(p, psize);
- if (p == fm->dv) {
- fm->dvsize = psize;
- goto postaction;
- }
- }
- }
- else
- set_free_with_pinuse(p, psize, next);
-
- if (is_small(psize)) {
- insert_small_chunk(fm, p, psize);
- check_free_chunk(fm, p);
- }
- else {
- tchunkptr tp = (tchunkptr)p;
- insert_large_chunk(fm, tp, psize);
- check_free_chunk(fm, p);
- if (--fm->release_checks == 0)
- release_unused_segments(fm);
- }
- goto postaction;
- }
- }
- erroraction:
- USAGE_ERROR_ACTION(fm, p);
- postaction:
- POSTACTION(fm);
- }
- }
-#if !FOOTERS
-#undef fm
-#endif /* FOOTERS */
-}
-
-void* dlcalloc(size_t n_elements, size_t elem_size) {
- void* mem;
- size_t req = 0;
- if (n_elements != 0) {
- req = n_elements * elem_size;
- if (((n_elements | elem_size) & ~(size_t)0xffff) &&
- (req / n_elements != elem_size))
- req = MAX_SIZE_T; /* force downstream failure on overflow */
- }
- mem = dlmalloc(req);
- if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
- memset(mem, 0, req);
- return mem;
-}
-
-void* dlrealloc(void* oldmem, size_t bytes) {
- if (oldmem == 0)
- return dlmalloc(bytes);
-#ifdef REALLOC_ZERO_BYTES_FREES
- if (bytes == 0) {
- dlfree(oldmem);
- return 0;
- }
-#endif /* REALLOC_ZERO_BYTES_FREES */
- else {
-#if ! FOOTERS
- mstate m = gm;
-#else /* FOOTERS */
- mstate m = get_mstate_for(mem2chunk(oldmem));
- if (!ok_magic(m)) {
- USAGE_ERROR_ACTION(m, oldmem);
- return 0;
- }
-#endif /* FOOTERS */
- return internal_realloc(m, oldmem, bytes);
- }
-}
-
-void* dlmemalign(size_t alignment, size_t bytes) {
- return internal_memalign(gm, alignment, bytes);
-}
-
-void** dlindependent_calloc(size_t n_elements, size_t elem_size,
- void* chunks[]) {
- size_t sz = elem_size; /* serves as 1-element array */
- return ialloc(gm, n_elements, &sz, 3, chunks);
-}
-
-void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
- void* chunks[]) {
- return ialloc(gm, n_elements, sizes, 0, chunks);
-}
-
-void* dlvalloc(size_t bytes) {
- size_t pagesz;
- ensure_initialization();
- pagesz = mparams.page_size;
- return dlmemalign(pagesz, bytes);
-}
-
-void* dlpvalloc(size_t bytes) {
- size_t pagesz;
- ensure_initialization();
- pagesz = mparams.page_size;
- return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
-}
-
-int dlmalloc_trim(size_t pad) {
- int result = 0;
- ensure_initialization();
- if (!PREACTION(gm)) {
- result = sys_trim(gm, pad);
- POSTACTION(gm);
- }
- return result;
-}
-
-size_t dlmalloc_footprint(void) {
- return gm->footprint;
-}
-
-size_t dlmalloc_max_footprint(void) {
- return gm->max_footprint;
-}
-
-#if !NO_MALLINFO
-struct mallinfo dlmallinfo(void) {
- return internal_mallinfo(gm);
-}
-#endif /* NO_MALLINFO */
-
-void dlmalloc_stats() {
- internal_malloc_stats(gm);
-}
-
-int dlmallopt(int param_number, int value) {
- return change_mparam(param_number, value);
-}
-
-#endif /* !ONLY_MSPACES */
-
-size_t dlmalloc_usable_size(void* mem) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
- if (is_inuse(p))
- return chunksize(p) - overhead_for(p);
- }
- return 0;
-}
-
-/* ----------------------------- user mspaces ---------------------------- */
-
-#if MSPACES
-
-static mstate init_user_mstate(char* tbase, size_t tsize) {
- size_t msize = pad_request(sizeof(struct malloc_state));
- mchunkptr mn;
- mchunkptr msp = align_as_chunk(tbase);
- mstate m = (mstate)(chunk2mem(msp));
- memset(m, 0, msize);
- INITIAL_LOCK(&m->mutex);
- msp->head = (msize|INUSE_BITS);
- m->seg.base = m->least_addr = tbase;
- m->seg.size = m->footprint = m->max_footprint = tsize;
- m->magic = mparams.magic;
- m->release_checks = MAX_RELEASE_CHECK_RATE;
- m->mflags = mparams.default_mflags;
- m->extp = 0;
- m->exts = 0;
- disable_contiguous(m);
- init_bins(m);
- mn = next_chunk(mem2chunk(m));
- init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
- check_top_chunk(m, m->top);
- return m;
-}
-
-mspace create_mspace(size_t capacity, int locked) {
- mstate m = 0;
- size_t msize;
- ensure_initialization();
- msize = pad_request(sizeof(struct malloc_state));
- if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
- size_t rs = ((capacity == 0)? mparams.granularity :
- (capacity + TOP_FOOT_SIZE + msize));
- size_t tsize = granularity_align(rs);
- char* tbase = (char*)(CALL_MMAP(tsize));
- if (tbase != CMFAIL) {
- m = init_user_mstate(tbase, tsize);
- m->seg.sflags = USE_MMAP_BIT;
- set_lock(m, locked);
- }
- }
- return (mspace)m;
-}
-
-mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
- mstate m = 0;
- size_t msize;
- ensure_initialization();
- msize = pad_request(sizeof(struct malloc_state));
- if (capacity > msize + TOP_FOOT_SIZE &&
- capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
- m = init_user_mstate((char*)base, capacity);
- m->seg.sflags = EXTERN_BIT;
- set_lock(m, locked);
- }
- return (mspace)m;
-}
-
-int mspace_track_large_chunks(mspace msp, int enable) {
- int ret = 0;
- mstate ms = (mstate)msp;
- if (!PREACTION(ms)) {
- if (!use_mmap(ms))
- ret = 1;
- if (!enable)
- enable_mmap(ms);
- else
- disable_mmap(ms);
- POSTACTION(ms);
- }
- return ret;
-}
-
-size_t destroy_mspace(mspace msp) {
- size_t freed = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- msegmentptr sp = &ms->seg;
- while (sp != 0) {
- char* base = sp->base;
- size_t size = sp->size;
- flag_t flag = sp->sflags;
- sp = sp->next;
- if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
- CALL_MUNMAP(base, size) == 0)
- freed += size;
- }
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return freed;
-}
-
-/*
- mspace versions of routines are near-clones of the global
- versions. This is not so nice but better than the alternatives.
-*/
-
-
-void* mspace_malloc(mspace msp, size_t bytes) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- if (!PREACTION(ms)) {
- void* mem;
- size_t nb;
- if (bytes <= MAX_SMALL_REQUEST) {
- bindex_t idx;
- binmap_t smallbits;
- nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
- idx = small_index(nb);
- smallbits = ms->smallmap >> idx;
-
- if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
- mchunkptr b, p;
- idx += ~smallbits & 1; /* Uses next bin if idx empty */
- b = smallbin_at(ms, idx);
- p = b->fd;
- assert(chunksize(p) == small_index2size(idx));
- unlink_first_small_chunk(ms, b, p, idx);
- set_inuse_and_pinuse(ms, p, small_index2size(idx));
- mem = chunk2mem(p);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- else if (nb > ms->dvsize) {
- if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
- mchunkptr b, p, r;
- size_t rsize;
- bindex_t i;
- binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
- binmap_t leastbit = least_bit(leftbits);
- compute_bit2idx(leastbit, i);
- b = smallbin_at(ms, i);
- p = b->fd;
- assert(chunksize(p) == small_index2size(i));
- unlink_first_small_chunk(ms, b, p, i);
- rsize = small_index2size(i) - nb;
- /* Fit here cannot be remainderless if 4byte sizes */
- if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(ms, p, small_index2size(i));
- else {
- set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
- r = chunk_plus_offset(p, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- replace_dv(ms, r, rsize);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
- }
- }
- else if (bytes >= MAX_REQUEST)
- nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
- else {
- nb = pad_request(bytes);
- if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
- }
-
- if (nb <= ms->dvsize) {
- size_t rsize = ms->dvsize - nb;
- mchunkptr p = ms->dv;
- if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
- mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
- ms->dvsize = rsize;
- set_size_and_pinuse_of_free_chunk(r, rsize);
- set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
- }
- else { /* exhaust dv */
- size_t dvs = ms->dvsize;
- ms->dvsize = 0;
- ms->dv = 0;
- set_inuse_and_pinuse(ms, p, dvs);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- else if (nb < ms->topsize) { /* Split top */
- size_t rsize = ms->topsize -= nb;
- mchunkptr p = ms->top;
- mchunkptr r = ms->top = chunk_plus_offset(p, nb);
- r->head = rsize | PINUSE_BIT;
- set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
- mem = chunk2mem(p);
- check_top_chunk(ms, ms->top);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- mem = sys_alloc(ms, nb);
-
- postaction:
- POSTACTION(ms);
- return mem;
- }
-
- return 0;
-}
-
-void mspace_free(mspace msp, void* mem) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
-#if FOOTERS
- mstate fm = get_mstate_for(p);
- msp = msp; /* placate people compiling -Wunused */
-#else /* FOOTERS */
- mstate fm = (mstate)msp;
-#endif /* FOOTERS */
- if (!ok_magic(fm)) {
- USAGE_ERROR_ACTION(fm, p);
- return;
- }
- if (!PREACTION(fm)) {
- check_inuse_chunk(fm, p);
- if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
- size_t psize = chunksize(p);
- mchunkptr next = chunk_plus_offset(p, psize);
- if (!pinuse(p)) {
- size_t prevsize = p->prev_foot;
- if (is_mmapped(p)) {
- psize += prevsize + MMAP_FOOT_PAD;
- if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
- fm->footprint -= psize;
- goto postaction;
- }
- else {
- mchunkptr prev = chunk_minus_offset(p, prevsize);
- psize += prevsize;
- p = prev;
- if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
- if (p != fm->dv) {
- unlink_chunk(fm, p, prevsize);
- }
- else if ((next->head & INUSE_BITS) == INUSE_BITS) {
- fm->dvsize = psize;
- set_free_with_pinuse(p, psize, next);
- goto postaction;
- }
- }
- else
- goto erroraction;
- }
- }
-
- if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
- if (!cinuse(next)) { /* consolidate forward */
- if (next == fm->top) {
- size_t tsize = fm->topsize += psize;
- fm->top = p;
- p->head = tsize | PINUSE_BIT;
- if (p == fm->dv) {
- fm->dv = 0;
- fm->dvsize = 0;
- }
- if (should_trim(fm, tsize))
- sys_trim(fm, 0);
- goto postaction;
- }
- else if (next == fm->dv) {
- size_t dsize = fm->dvsize += psize;
- fm->dv = p;
- set_size_and_pinuse_of_free_chunk(p, dsize);
- goto postaction;
- }
- else {
- size_t nsize = chunksize(next);
- psize += nsize;
- unlink_chunk(fm, next, nsize);
- set_size_and_pinuse_of_free_chunk(p, psize);
- if (p == fm->dv) {
- fm->dvsize = psize;
- goto postaction;
- }
- }
- }
- else
- set_free_with_pinuse(p, psize, next);
-
- if (is_small(psize)) {
- insert_small_chunk(fm, p, psize);
- check_free_chunk(fm, p);
- }
- else {
- tchunkptr tp = (tchunkptr)p;
- insert_large_chunk(fm, tp, psize);
- check_free_chunk(fm, p);
- if (--fm->release_checks == 0)
- release_unused_segments(fm);
- }
- goto postaction;
- }
- }
- erroraction:
- USAGE_ERROR_ACTION(fm, p);
- postaction:
- POSTACTION(fm);
- }
- }
-}
-
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
- void* mem;
- size_t req = 0;
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- if (n_elements != 0) {
- req = n_elements * elem_size;
- if (((n_elements | elem_size) & ~(size_t)0xffff) &&
- (req / n_elements != elem_size))
- req = MAX_SIZE_T; /* force downstream failure on overflow */
- }
- mem = internal_malloc(ms, req);
- if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
- memset(mem, 0, req);
- return mem;
-}
-
-void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
- if (oldmem == 0)
- return mspace_malloc(msp, bytes);
-#ifdef REALLOC_ZERO_BYTES_FREES
- if (bytes == 0) {
- mspace_free(msp, oldmem);
- return 0;
- }
-#endif /* REALLOC_ZERO_BYTES_FREES */
- else {
-#if FOOTERS
- mchunkptr p = mem2chunk(oldmem);
- mstate ms = get_mstate_for(p);
-#else /* FOOTERS */
- mstate ms = (mstate)msp;
-#endif /* FOOTERS */
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return internal_realloc(ms, oldmem, bytes);
- }
-}
-
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return internal_memalign(ms, alignment, bytes);
-}
-
-void** mspace_independent_calloc(mspace msp, size_t n_elements,
- size_t elem_size, void* chunks[]) {
- size_t sz = elem_size; /* serves as 1-element array */
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return ialloc(ms, n_elements, &sz, 3, chunks);
-}
-
-void** mspace_independent_comalloc(mspace msp, size_t n_elements,
- size_t sizes[], void* chunks[]) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return ialloc(ms, n_elements, sizes, 0, chunks);
-}
-
-int mspace_trim(mspace msp, size_t pad) {
- int result = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- if (!PREACTION(ms)) {
- result = sys_trim(ms, pad);
- POSTACTION(ms);
- }
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return result;
-}
-
-void mspace_malloc_stats(mspace msp) {
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- internal_malloc_stats(ms);
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
-}
-
-size_t mspace_footprint(mspace msp) {
- size_t result = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- result = ms->footprint;
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return result;
-}
-
-
-size_t mspace_max_footprint(mspace msp) {
- size_t result = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- result = ms->max_footprint;
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return result;
-}
-
-
-#if !NO_MALLINFO
-struct mallinfo mspace_mallinfo(mspace msp) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return internal_mallinfo(ms);
-}
-#endif /* NO_MALLINFO */
-
-size_t mspace_usable_size(void* mem) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
- if (is_inuse(p))
- return chunksize(p) - overhead_for(p);
- }
- return 0;
-}
-
-int mspace_mallopt(int param_number, int value) {
- return change_mparam(param_number, value);
-}
-
-#endif /* MSPACES */
-
-
-/* -------------------- Alternative MORECORE functions ------------------- */
-
-/*
- Guidelines for creating a custom version of MORECORE:
-
- * For best performance, MORECORE should allocate in multiples of pagesize.
- * MORECORE may allocate more memory than requested. (Or even less,
- but this will usually result in a malloc failure.)
- * MORECORE must not allocate memory when given argument zero, but
- instead return one past the end address of memory from previous
- nonzero call.
- * For best performance, consecutive calls to MORECORE with positive
- arguments should return increasing addresses, indicating that
- space has been contiguously extended.
- * Even though consecutive calls to MORECORE need not return contiguous
- addresses, it must be OK for malloc'ed chunks to span multiple
- regions in those cases where they do happen to be contiguous.
- * MORECORE need not handle negative arguments -- it may instead
- just return MFAIL when given negative arguments.
- Negative arguments are always multiples of pagesize. MORECORE
- must not misinterpret negative args as large positive unsigned
- args. You can suppress all such calls from even occurring by defining
- MORECORE_CANNOT_TRIM,
-
- As an example alternative MORECORE, here is a custom allocator
- kindly contributed for pre-OSX macOS. It uses virtually but not
- necessarily physically contiguous non-paged memory (locked in,
- present and won't get swapped out). You can use it by uncommenting
- this section, adding some #includes, and setting up the appropriate
- defines above:
-
- #define MORECORE osMoreCore
-
- There is also a shutdown routine that should somehow be called for
- cleanup upon program exit.
-
- #define MAX_POOL_ENTRIES 100
- #define MINIMUM_MORECORE_SIZE (64 * 1024U)
- static int next_os_pool;
- void *our_os_pools[MAX_POOL_ENTRIES];
-
- void *osMoreCore(int size)
- {
- void *ptr = 0;
- static void *sbrk_top = 0;
-
- if (size > 0)
- {
- if (size < MINIMUM_MORECORE_SIZE)
- size = MINIMUM_MORECORE_SIZE;
- if (CurrentExecutionLevel() == kTaskLevel)
- ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
- if (ptr == 0)
- {
- return (void *) MFAIL;
- }
- // save ptrs so they can be freed during cleanup
- our_os_pools[next_os_pool] = ptr;
- next_os_pool++;
- ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
- sbrk_top = (char *) ptr + size;
- return ptr;
- }
- else if (size < 0)
- {
- // we don't currently support shrink behavior
- return (void *) MFAIL;
- }
- else
- {
- return sbrk_top;
- }
- }
-
- // cleanup any allocated memory pools
- // called as last thing before shutting down driver
-
- void osCleanupMem(void)
- {
- void **ptr;
-
- for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
- if (*ptr)
- {
- PoolDeallocate(*ptr);
- *ptr = 0;
- }
- }
-
-*/
-
-
-/* -----------------------------------------------------------------------
-History:
- V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee)
- * Use zeros instead of prev foot for is_mmapped
- * Add mspace_track_large_chunks; thanks to Jean Brouwers
- * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
- * Fix insufficient sys_alloc padding when using 16byte alignment
- * Fix bad error check in mspace_footprint
- * Adaptations for ptmalloc; thanks to Wolfram Gloger.
- * Reentrant spin locks; thanks to Earl Chew and others
- * Win32 improvements; thanks to Niall Douglas and Earl Chew
- * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
- * Extension hook in malloc_state
- * Various small adjustments to reduce warnings on some compilers
- * Various configuration extensions/changes for more platforms. Thanks
- to all who contributed these.
-
- V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee)
- * Add max_footprint functions
- * Ensure all appropriate literals are size_t
- * Fix conditional compilation problem for some #define settings
- * Avoid concatenating segments with the one provided
- in create_mspace_with_base
- * Rename some variables to avoid compiler shadowing warnings
- * Use explicit lock initialization.
- * Better handling of sbrk interference.
- * Simplify and fix segment insertion, trimming and mspace_destroy
- * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
- * Thanks especially to Dennis Flanagan for help on these.
-
- V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee)
- * Fix memalign brace error.
-
- V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee)
- * Fix improper #endif nesting in C++
- * Add explicit casts needed for C++
-
- V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee)
- * Use trees for large bins
- * Support mspaces
- * Use segments to unify sbrk-based and mmap-based system allocation,
- removing need for emulation on most platforms without sbrk.
- * Default safety checks
- * Optional footer checks. Thanks to William Robertson for the idea.
- * Internal code refactoring
- * Incorporate suggestions and platform-specific changes.
- Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
- Aaron Bachmann, Emery Berger, and others.
- * Speed up non-fastbin processing enough to remove fastbins.
- * Remove useless cfree() to avoid conflicts with other apps.
- * Remove internal memcpy, memset. Compilers handle builtins better.
- * Remove some options that no one ever used and rename others.
-
- V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee)
- * Fix malloc_state bitmap array misdeclaration
-
- V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee)
- * Allow tuning of FIRST_SORTED_BIN_SIZE
- * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
- * Better detection and support for non-contiguousness of MORECORE.
- Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
- * Bypass most of malloc if no frees. Thanks To Emery Berger.
- * Fix freeing of old top non-contiguous chunk im sysmalloc.
- * Raised default trim and map thresholds to 256K.
- * Fix mmap-related #defines. Thanks to Lubos Lunak.
- * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
- * Branch-free bin calculation
- * Default trim and mmap thresholds now 256K.
-
- V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee)
- * Introduce independent_comalloc and independent_calloc.
- Thanks to Michael Pachos for motivation and help.
- * Make optional .h file available
- * Allow > 2GB requests on 32bit systems.
- * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
- Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
- and Anonymous.
- * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
- helping test this.)
- * memalign: check alignment arg
- * realloc: don't try to shift chunks backwards, since this
- leads to more fragmentation in some programs and doesn't
- seem to help in any others.
- * Collect all cases in malloc requiring system memory into sysmalloc
- * Use mmap as backup to sbrk
- * Place all internal state in malloc_state
- * Introduce fastbins (although similar to 2.5.1)
- * Many minor tunings and cosmetic improvements
- * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
- * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
- Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
- * Include errno.h to support default failure action.
-
- V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee)
- * return null for negative arguments
- * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
- * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
- (e.g. WIN32 platforms)
- * Cleanup header file inclusion for WIN32 platforms
- * Cleanup code to avoid Microsoft Visual C++ compiler complaints
- * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
- memory allocation routines
- * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
- * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
- usage of 'assert' in non-WIN32 code
- * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
- avoid infinite loop
- * Always call 'fREe()' rather than 'free()'
-
- V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee)
- * Fixed ordering problem with boundary-stamping
-
- V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee)
- * Added pvalloc, as recommended by H.J. Liu
- * Added 64bit pointer support mainly from Wolfram Gloger
- * Added anonymously donated WIN32 sbrk emulation
- * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
- * malloc_extend_top: fix mask error that caused wastage after
- foreign sbrks
- * Add linux mremap support code from HJ Liu
-
- V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee)
- * Integrated most documentation with the code.
- * Add support for mmap, with help from
- Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
- * Use last_remainder in more cases.
- * Pack bins using idea from colin@nyx10.cs.du.edu
- * Use ordered bins instead of best-fit threshhold
- * Eliminate block-local decls to simplify tracing and debugging.
- * Support another case of realloc via move into top
- * Fix error occuring when initial sbrk_base not word-aligned.
- * Rely on page size for units instead of SBRK_UNIT to
- avoid surprises about sbrk alignment conventions.
- * Add mallinfo, mallopt. Thanks to Raymond Nijssen
- (raymond@es.ele.tue.nl) for the suggestion.
- * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
- * More precautions for cases where other routines call sbrk,
- courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
- * Added macros etc., allowing use in linux libc from
- H.J. Lu (hjl@gnu.ai.mit.edu)
- * Inverted this history list
-
- V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee)
- * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
- * Removed all preallocation code since under current scheme
- the work required to undo bad preallocations exceeds
- the work saved in good cases for most test programs.
- * No longer use return list or unconsolidated bins since
- no scheme using them consistently outperforms those that don't
- given above changes.
- * Use best fit for very large chunks to prevent some worst-cases.
- * Added some support for debugging
-
- V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee)
- * Removed footers when chunks are in use. Thanks to
- Paul Wilson (wilson@cs.texas.edu) for the suggestion.
-
- V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee)
- * Added malloc_trim, with help from Wolfram Gloger
- (wmglo@Dent.MED.Uni-Muenchen.DE).
-
- V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g)
-
- V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g)
- * realloc: try to expand in both directions
- * malloc: swap order of clean-bin strategy;
- * realloc: only conditionally expand backwards
- * Try not to scavenge used bins
- * Use bin counts as a guide to preallocation
- * Occasionally bin return list chunks in first scan
- * Add a few optimizations from colin@nyx10.cs.du.edu
-
- V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g)
- * faster bin computation & slightly different binning
- * merged all consolidations to one part of malloc proper
- (eliminating old malloc_find_space & malloc_clean_bin)
- * Scan 2 returns chunks (not just 1)
- * Propagate failure in realloc if malloc returns 0
- * Add stuff to allow compilation on non-ANSI compilers
- from kpv@research.att.com
-
- V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu)
- * removed potential for odd address access in prev_chunk
- * removed dependency on getpagesize.h
- * misc cosmetics and a bit more internal documentation
- * anticosmetics: mangled names in macros to evade debugger strangeness
- * tested on sparc, hp-700, dec-mips, rs6000
- with gcc & native cc (hp, dec only) allowing
- Detlefs & Zorn comparison study (in SIGPLAN Notices.)
-
- Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu)
- * Based loosely on libg++-1.2X malloc. (It retains some of the overall
- structure of old version, but most details differ.)
-
-*/
-
-#endif
+#ifdef NEDMALLOC_ENABLED
+/*
+ This is a version (aka dlmalloc) of malloc/free/realloc written by
+ Doug Lea and released to the public domain, as explained at
+ http://creativecommons.org/licenses/publicdomain. Send questions,
+ comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee)
+
+ Note: There may be an updated version of this malloc obtainable at
+ ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+ Check before installing!
+
+* Quickstart
+
+ This library is all in one file to simplify the most common usage:
+ ftp it, compile it (-O3), and link it into another program. All of
+ the compile-time options default to reasonable values for use on
+ most platforms. You might later want to step through various
+ compile-time and dynamic tuning options.
+
+ For convenience, an include file for code using this malloc is at:
+ ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h
+ You don't really need this .h file unless you call functions not
+ defined in your system include files. The .h file contains only the
+ excerpts from this file needed for using this malloc on ANSI C/C++
+ systems, so long as you haven't changed compile-time options about
+ naming and tuning parameters. If you do, then you can create your
+ own malloc.h that does include all settings by cutting at the point
+ indicated below. Note that you may already by default be using a C
+ library containing a malloc that is based on some version of this
+ malloc (for example in linux). You might still want to use the one
+ in this file to customize settings or to avoid overheads associated
+ with library versions.
+
+* Vital statistics:
+
+ Supported pointer/size_t representation: 4 or 8 bytes
+ size_t MUST be an unsigned type of the same width as
+ pointers. (If you are using an ancient system that declares
+ size_t as a signed type, or need it to be a different width
+ than pointers, you can use a previous release of this malloc
+ (e.g. 2.7.2) supporting these.)
+
+ Alignment: 8 bytes (default)
+ This suffices for nearly all current machines and C compilers.
+ However, you can define MALLOC_ALIGNMENT to be wider than this
+ if necessary (up to 128bytes), at the expense of using more space.
+
+ Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes)
+ 8 or 16 bytes (if 8byte sizes)
+ Each malloced chunk has a hidden word of overhead holding size
+ and status information, and additional cross-check word
+ if FOOTERS is defined.
+
+ Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead)
+ 8-byte ptrs: 32 bytes (including overhead)
+
+ Even a request for zero bytes (i.e., malloc(0)) returns a
+ pointer to something of the minimum allocatable size.
+ The maximum overhead wastage (i.e., number of extra bytes
+ allocated than were requested in malloc) is less than or equal
+ to the minimum size, except for requests >= mmap_threshold that
+ are serviced via mmap(), where the worst case wastage is about
+ 32 bytes plus the remainder from a system page (the minimal
+ mmap unit); typically 4096 or 8192 bytes.
+
+ Security: static-safe; optionally more or less
+ The "security" of malloc refers to the ability of malicious
+ code to accentuate the effects of errors (for example, freeing
+ space that is not currently malloc'ed or overwriting past the
+ ends of chunks) in code that calls malloc. This malloc
+ guarantees not to modify any memory locations below the base of
+ heap, i.e., static variables, even in the presence of usage
+ errors. The routines additionally detect most improper frees
+ and reallocs. All this holds as long as the static bookkeeping
+ for malloc itself is not corrupted by some other means. This
+ is only one aspect of security -- these checks do not, and
+ cannot, detect all possible programming errors.
+
+ If FOOTERS is defined nonzero, then each allocated chunk
+ carries an additional check word to verify that it was malloced
+ from its space. These check words are the same within each
+ execution of a program using malloc, but differ across
+ executions, so externally crafted fake chunks cannot be
+ freed. This improves security by rejecting frees/reallocs that
+ could corrupt heap memory, in addition to the checks preventing
+ writes to statics that are always on. This may further improve
+ security at the expense of time and space overhead. (Note that
+ FOOTERS may also be worth using with MSPACES.)
+
+ By default detected errors cause the program to abort (calling
+ "abort()"). You can override this to instead proceed past
+ errors by defining PROCEED_ON_ERROR. In this case, a bad free
+ has no effect, and a malloc that encounters a bad address
+ caused by user overwrites will ignore the bad address by
+ dropping pointers and indices to all known memory. This may
+ be appropriate for programs that should continue if at all
+ possible in the face of programming errors, although they may
+ run out of memory because dropped memory is never reclaimed.
+
+ If you don't like either of these options, you can define
+ CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+ else. And if if you are sure that your program using malloc has
+ no errors or vulnerabilities, you can define INSECURE to 1,
+ which might (or might not) provide a small performance improvement.
+
+ Thread-safety: NOT thread-safe unless USE_LOCKS defined
+ When USE_LOCKS is defined, each public call to malloc, free,
+ etc is surrounded with either a pthread mutex or a win32
+ spinlock (depending on WIN32). This is not especially fast, and
+ can be a major bottleneck. It is designed only to provide
+ minimal protection in concurrent environments, and to provide a
+ basis for extensions. If you are using malloc in a concurrent
+ program, consider instead using nedmalloc
+ (http://www.nedprod.com/programs/portable/nedmalloc/) or
+ ptmalloc (See http://www.malloc.de), which are derived
+ from versions of this malloc.
+
+ System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+ This malloc can use unix sbrk or any emulation (invoked using
+ the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+ (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+ memory. On most unix systems, it tends to work best if both
+ MORECORE and MMAP are enabled. On Win32, it uses emulations
+ based on VirtualAlloc. It also uses common C library functions
+ like memset.
+
+ Compliance: I believe it is compliant with the Single Unix Specification
+ (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+ others as well.
+
+* Overview of algorithms
+
+ This is not the fastest, most space-conserving, most portable, or
+ most tunable malloc ever written. However it is among the fastest
+ while also being among the most space-conserving, portable and
+ tunable. Consistent balance across these factors results in a good
+ general-purpose allocator for malloc-intensive programs.
+
+ In most ways, this malloc is a best-fit allocator. Generally, it
+ chooses the best-fitting existing chunk for a request, with ties
+ broken in approximately least-recently-used order. (This strategy
+ normally maintains low fragmentation.) However, for requests less
+ than 256bytes, it deviates from best-fit when there is not an
+ exactly fitting available chunk by preferring to use space adjacent
+ to that used for the previous small request, as well as by breaking
+ ties in approximately most-recently-used order. (These enhance
+ locality of series of small allocations.) And for very large requests
+ (>= 256Kb by default), it relies on system memory mapping
+ facilities, if supported. (This helps avoid carrying around and
+ possibly fragmenting memory used only for large chunks.)
+
+ All operations (except malloc_stats and mallinfo) have execution
+ times that are bounded by a constant factor of the number of bits in
+ a size_t, not counting any clearing in calloc or copying in realloc,
+ or actions surrounding MORECORE and MMAP that have times
+ proportional to the number of non-contiguous regions returned by
+ system allocation routines, which is often just 1. In real-time
+ applications, you can optionally suppress segment traversals using
+ NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+ system allocators return non-contiguous spaces, at the typical
+ expense of carrying around more memory and increased fragmentation.
+
+ The implementation is not very modular and seriously overuses
+ macros. Perhaps someday all C compilers will do as good a job
+ inlining modular code as can now be done by brute-force expansion,
+ but now, enough of them seem not to.
+
+ Some compilers issue a lot of warnings about code that is
+ dead/unreachable only on some platforms, and also about intentional
+ uses of negation on unsigned types. All known cases of each can be
+ ignored.
+
+ For a longer but out of date high-level description, see
+ http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+ If MSPACES is defined, then in addition to malloc, free, etc.,
+ this file also defines mspace_malloc, mspace_free, etc. These
+ are versions of malloc routines that take an "mspace" argument
+ obtained using create_mspace, to control all internal bookkeeping.
+ If ONLY_MSPACES is defined, only these versions are compiled.
+ So if you would like to use this allocator for only some allocations,
+ and your system malloc for others, you can compile with
+ ONLY_MSPACES and then do something like...
+ static mspace mymspace = create_mspace(0,0); // for example
+ #define mymalloc(bytes) mspace_malloc(mymspace, bytes)
+
+ (Note: If you only need one instance of an mspace, you can instead
+ use "USE_DL_PREFIX" to relabel the global malloc.)
+
+ You can similarly create thread-local allocators by storing
+ mspaces as thread-locals. For example:
+ static __thread mspace tlms = 0;
+ void* tlmalloc(size_t bytes) {
+ if (tlms == 0) tlms = create_mspace(0, 0);
+ return mspace_malloc(tlms, bytes);
+ }
+ void tlfree(void* mem) { mspace_free(tlms, mem); }
+
+ Unless FOOTERS is defined, each mspace is completely independent.
+ You cannot allocate from one and free to another (although
+ conformance is only weakly checked, so usage errors are not always
+ caught). If FOOTERS is defined, then each chunk carries around a tag
+ indicating its originating mspace, and frees are directed to their
+ originating spaces.
+
+ ------------------------- Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32 default: defined if _WIN32 defined
+ Defining WIN32 sets up defaults for MS environment and compilers.
+ Otherwise defaults are for unix. Beware that there seem to be some
+ cases where this malloc might not be a pure drop-in replacement for
+ Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+ SetDIBits()) may be due to bugs in some video driver implementations
+ when pixel buffers are malloc()ed, and the region spans more than
+ one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+ default granularity, pixel buffers may straddle virtual allocation
+ regions more often than when using the Microsoft allocator. You can
+ avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+ buffers rather than using malloc(). If this is not possible,
+ recompile this malloc with a larger DEFAULT_GRANULARITY.
+
+MALLOC_ALIGNMENT default: (size_t)8
+ Controls the minimum alignment for malloc'ed chunks. It must be a
+ power of two and at least 8, even on machines for which smaller
+ alignments would suffice. It may be defined as larger than this
+ though. Note however that code and data structures are optimized for
+ the case of 8-byte alignment.
+
+MSPACES default: 0 (false)
+ If true, compile in support for independent allocation spaces.
+ This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES default: 0 (false)
+ If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS default: 0 (false)
+ Causes each call to each public routine to be surrounded with
+ pthread or WIN32 mutex lock/unlock. (If set true, this can be
+ overridden on a per-mspace basis for mspace versions.) If set to a
+ non-zero value other than 1, locks are used, but their
+ implementation is left out, so lock functions must be supplied manually,
+ as described below.
+
+USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC
+ If true, uses custom spin locks for locking. This is currently
+ supported only for x86 platforms using gcc or recent MS compilers.
+ Otherwise, posix locks or win32 critical sections are used.
+
+FOOTERS default: 0
+ If true, provide extra checking and dispatching by placing
+ information in the footers of allocated chunks. This adds
+ space and time overhead.
+
+INSECURE default: 0
+ If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX default: NOT defined
+ Causes compiler to prefix all public routines with the string 'dl'.
+ This can be useful when you only want to use this malloc in one part
+ of a program, using your regular system malloc elsewhere.
+
+ABORT default: defined as abort()
+ Defines how to abort on failed checks. On most systems, a failed
+ check cannot die with an "assert" or even print an informative
+ message, because the underlying print routines in turn call malloc,
+ which will fail again. Generally, the best policy is to simply call
+ abort(). It's not very useful to do more than this because many
+ errors due to overwriting will show up as address faults (null, odd
+ addresses etc) rather than malloc-triggered checks, so will also
+ abort. Also, most compilers know that abort() does not return, so
+ can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR default: defined as 0 (false)
+ Controls whether detected bad addresses cause them to bypassed
+ rather than aborting. If set, detected bad arguments to free and
+ realloc are ignored. And all bookkeeping information is zeroed out
+ upon a detected overwrite of freed heap space, thus losing the
+ ability to ever return it from malloc again, but enabling the
+ application to proceed. If PROCEED_ON_ERROR is defined, the
+ static variable malloc_corruption_error_count is compiled in
+ and can be examined to see if errors have occurred. This option
+ generates slower code than the default abort policy.
+
+DEBUG default: NOT defined
+ The DEBUG setting is mainly intended for people trying to modify
+ this code or diagnose problems when porting to new platforms.
+ However, it may also be able to better isolate user errors than just
+ using runtime checks. The assertions in the check routines spell
+ out in more detail the assumptions and invariants underlying the
+ algorithms. The checking is fairly extensive, and will slow down
+ execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+ set will attempt to check every non-mmapped allocated and free chunk
+ in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE default: defined as 1 (true)
+ Debugging assertion failures can be nearly impossible if your
+ version of the assert macro causes malloc to be called, which will
+ lead to a cascade of further failures, blowing the runtime stack.
+ ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+ which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32
+ The action to take before "return 0" when malloc fails to be able to
+ return memory because there is none available.
+
+HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES
+ True if this system supports sbrk or an emulation of it.
+
+MORECORE default: sbrk
+ The name of the sbrk-style system routine to call to obtain more
+ memory. See below for guidance on writing custom MORECORE
+ functions. The type of the argument to sbrk/MORECORE varies across
+ systems. It cannot be size_t, because it supports negative
+ arguments, so it is normally the signed type of the same width as
+ size_t (sometimes declared as "intptr_t"). It doesn't much matter
+ though. Internally, we only call it with arguments less than half
+ the max value of a size_t, which should work across all reasonable
+ possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE
+ If true, take advantage of fact that consecutive calls to MORECORE
+ with positive arguments always return contiguous increasing
+ addresses. This is true of unix sbrk. It does not hurt too much to
+ set it true anyway, since malloc copes with non-contiguities.
+ Setting it false when definitely non-contiguous saves time
+ and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM default: NOT defined
+ True if MORECORE cannot release space back to the system when given
+ negative arguments. This is generally necessary only if you are
+ using a hand-crafted MORECORE function that cannot handle negative
+ arguments.
+
+NO_SEGMENT_TRAVERSAL default: 0
+ If non-zero, suppresses traversals of memory segments
+ returned by either MORECORE or CALL_MMAP. This disables
+ merging of segments that are contiguous, and selectively
+ releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP default: 1 (true)
+ True if this system supports mmap or an emulation of it. If so, and
+ HAVE_MORECORE is not true, MMAP is used for all system
+ allocation. If set and HAVE_MORECORE is true as well, MMAP is
+ primarily used to directly allocate very large blocks. It is also
+ used as a backup strategy in cases where MORECORE fails to provide
+ space from system. Note: A single call to MUNMAP is assumed to be
+ able to unmap memory that may have be allocated using multiple calls
+ to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP default: 1 on linux, else 0
+ If true realloc() uses mremap() to re-allocate large blocks and
+ extend or shrink allocation spaces.
+
+MMAP_CLEARS default: 1 except on WINCE.
+ True if mmap clears memory so calloc doesn't need to. This is true
+ for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS default: 0 (i.e., not used)
+ Causes malloc to use the builtin ffs() function to compute indices.
+ Some compilers may recognize and intrinsify ffs to be faster than the
+ supplied C version. Also, the case of x86 using gcc is special-cased
+ to an asm instruction, so is already as fast as it can be, and so
+ this setting has no effect. Similarly for Win32 under recent MS compilers.
+ (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize default: derive from system includes, or 4096.
+ The system page size. To the extent possible, this malloc manages
+ memory from the system in page-size units. This may be (and
+ usually is) a function rather than a constant. This is ignored
+ if WIN32, where page size is determined using getSystemInfo during
+ initialization. This may be several megabytes if ENABLE_LARGE_PAGES
+ is enabled.
+
+ENABLE_LARGE_PAGES default: NOT defined
+ Causes the system page size to be the value of GetLargePageMinimum()
+ if that function is available (Windows Server 2003/Vista or later).
+ This allows the use of large page entries in the MMU which can
+ significantly improve performance in large working set applications
+ as TLB cache load is reduced by a factor of three. Note that enabling
+ this option is equal to locking the process' memory in current
+ implementations of Windows and requires the SE_LOCK_MEMORY_PRIVILEGE
+ to be held by the process in order to succeed.
+
+USE_DEV_RANDOM default: 0 (i.e., not used)
+ Causes malloc to use /dev/random to initialize secure magic seed for
+ stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO default: 0
+ If defined, don't compile "mallinfo". This can be a simple way
+ of dealing with mismatches between system declarations and
+ those in this file.
+
+MALLINFO_FIELD_TYPE default: size_t
+ The type of the fields in the mallinfo struct. This was originally
+ defined as "int" in SVID etc, but is more usefully defined as
+ size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set
+
+REALLOC_ZERO_BYTES_FREES default: not defined
+ This should be set if a call to realloc with zero bytes should
+ be the same as a call to free. Some people think it should. Otherwise,
+ since this malloc returns a unique pointer for malloc(0), so does
+ realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H
+LACKS_STDLIB_H default: NOT defined unless on WIN32
+ Define these if your system does not have these header files.
+ You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS,
+ system_info.dwAllocationGranularity in WIN32,
+ GetLargePageMinimum() if ENABLE_LARGE_PAGES,
+ otherwise 64K.
+ Also settable using mallopt(M_GRANULARITY, x)
+ The unit for allocating and deallocating memory from the system. On
+ most systems with contiguous MORECORE, there is no reason to
+ make this more than a page. However, systems with MMAP tend to
+ either require or encourage larger granularities. You can increase
+ this value to prevent system allocation functions to be called so
+ often, especially if they are slow. The value must be at least one
+ page and must be a power of two. Setting to 0 causes initialization
+ to either page size or win32 region size. (Note: In previous
+ versions of malloc, the equivalent of this option was called
+ "TOP_PAD")
+
+DEFAULT_GRANULARITY_ALIGNED default: undefined (which means page size)
+ Whether to enforce alignment when allocating and deallocating memory
+ from the system i.e. the base address of all allocations will be
+ aligned to DEFAULT_GRANULARITY if it is set. Note that enabling this carries
+ some overhead as multiple calls must now be made when probing for a valid
+ aligned value, however it does greatly ease the checking for whether
+ a given memory pointer was allocated by this allocator rather than
+ some other.
+
+DEFAULT_TRIM_THRESHOLD default: 2MB
+ Also settable using mallopt(M_TRIM_THRESHOLD, x)
+ The maximum amount of unused top-most memory to keep before
+ releasing via malloc_trim in free(). Automatic trimming is mainly
+ useful in long-lived programs using contiguous MORECORE. Because
+ trimming via sbrk can be slow on some systems, and can sometimes be
+ wasteful (in cases where programs immediately afterward allocate
+ more large chunks) the value should be high enough so that your
+ overall system performance would improve by releasing this much
+ memory. As a rough guide, you might set to a value close to the
+ average size of a process (program) running on your system.
+ Releasing this much memory would allow such a process to run in
+ memory. Generally, it is worth tuning trim thresholds when a
+ program undergoes phases where several large chunks are allocated
+ and released in ways that can reuse each other's storage, perhaps
+ mixed with phases where there are no such chunks at all. The trim
+ value must be greater than page size to have any useful effect. To
+ disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+ some people use of mallocing a huge space and then freeing it at
+ program startup, in an attempt to reserve system memory, doesn't
+ have the intended effect under automatic trimming, since that memory
+ will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD default: 256K
+ Also settable using mallopt(M_MMAP_THRESHOLD, x)
+ The request size threshold for using MMAP to directly service a
+ request. Requests of at least this size that cannot be allocated
+ using already-existing space will be serviced via mmap. (If enough
+ normal freed space already exists it is used instead.) Using mmap
+ segregates relatively large chunks of memory so that they can be
+ individually obtained and released from the host system. A request
+ serviced through mmap is never reused by any other request (at least
+ not directly; the system may just so happen to remap successive
+ requests to the same locations). Segregating space in this way has
+ the benefits that: Mmapped space can always be individually released
+ back to the system, which helps keep the system level memory demands
+ of a long-lived program low. Also, mapped memory doesn't become
+ `locked' between other chunks, as can happen with normally allocated
+ chunks, which means that even trimming via malloc_trim would not
+ release them. However, it has the disadvantage that the space
+ cannot be reclaimed, consolidated, and then used to service later
+ requests, as happens with normal chunks. The advantages of mmap
+ nearly always outweigh disadvantages for "large" chunks, but the
+ value of "large" may vary across systems. The default is an
+ empirically derived value that works well in most systems. You can
+ disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP
+ The number of consolidated frees between checks to release
+ unused segments when freeing. When using non-contiguous segments,
+ especially with multiple mspaces, checking only for topmost space
+ doesn't always suffice to trigger trimming. To compensate for this,
+ free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+ current number of segments, if greater) try to release unused
+ segments to the OS when freeing chunks that result in
+ consolidation. The best value for this parameter is a compromise
+ between slowing down frees with relatively costly checks that
+ rarely trigger versus holding on to unused memory. To effectively
+ disable, set to MAX_SIZE_T. This may lead to a very slight speed
+ improvement at the expense of carrying around more memory.
+*/
+
+/* Version identifier to allow people to support multiple versions */
+#ifndef DLMALLOC_VERSION
+#define DLMALLOC_VERSION 20804
+#endif /* DLMALLOC_VERSION */
+
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif /* _WIN32 */
+#ifdef _WIN32_WCE
+#define LACKS_FCNTL_H
+#define WIN32 1
+#endif /* _WIN32_WCE */
+#endif /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION
+#endif /* MALLOC_FAILURE_ACTION */
+#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+#define MMAP_CLEARS 0
+#else
+#define MMAP_CLEARS 1
+#endif /* _WIN32_WCE */
+#endif /* WIN32 */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+/* OSX allocators provide 16 byte alignment */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)16U)
+#endif
+#endif /* HAVE_MORECORE */
+#endif /* DARWIN */
+
+#ifndef LACKS_SYS_TYPES_H
+#include <sys/types.h> /* For size_t */
+#endif /* LACKS_SYS_TYPES_H */
+
+#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310)
+#define SPIN_LOCKS_AVAILABLE 1
+#else
+#define SPIN_LOCKS_AVAILABLE 0
+#endif
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T (~(size_t)0)
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0 /* define to a value */
+#else
+#define ONLY_MSPACES 1
+#endif /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else /* ONLY_MSPACES */
+#define MSPACES 0
+#endif /* ONLY_MSPACES */
+#endif /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)8U)
+#endif /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif /* FOOTERS */
+#ifndef ABORT
+#define ABORT abort()
+#endif /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif /* PROCEED_ON_ERROR */
+#ifndef USE_LOCKS
+#define USE_LOCKS 0
+#endif /* USE_LOCKS */
+#ifndef USE_SPIN_LOCKS
+#if USE_LOCKS && SPIN_LOCKS_AVAILABLE
+#define USE_SPIN_LOCKS 1
+#else
+#define USE_SPIN_LOCKS 0
+#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */
+#endif /* USE_SPIN_LOCKS */
+#ifndef INSECURE
+#define INSECURE 0
+#endif /* INSECURE */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#else /* linux */
+#define HAVE_MREMAP 0
+#endif /* linux */
+#endif /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION errno = ENOMEM;
+#endif /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif /* ONLY_MSPACES */
+#endif /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else /* !HAVE_MORECORE */
+#define MORECORE_DEFAULT sbrk
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif /* MORECORE_CONTIGUOUS */
+#endif /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if (MORECORE_CONTIGUOUS || defined(WIN32))
+#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
+#else /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif /* MORECORE_CONTIGUOUS */
+#endif /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif /* MORECORE_CANNOT_TRIM */
+#endif /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else /* HAVE_MMAP */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* DEFAULT_MMAP_THRESHOLD */
+#ifndef MAX_RELEASE_CHECK_RATE
+#if HAVE_MMAP
+#define MAX_RELEASE_CHECK_RATE 4095
+#else
+#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* MAX_RELEASE_CHECK_RATE */
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 0
+#endif /* USE_BUILTIN_FFS */
+#ifndef USE_DEV_RANDOM
+#define USE_DEV_RANDOM 0
+#endif /* USE_DEV_RANDOM */
+#ifndef NO_MALLINFO
+#define NO_MALLINFO 0
+#endif /* NO_MALLINFO */
+#ifndef MALLINFO_FIELD_TYPE
+#define MALLINFO_FIELD_TYPE size_t
+#endif /* MALLINFO_FIELD_TYPE */
+#ifndef NO_SEGMENT_TRAVERSAL
+#define NO_SEGMENT_TRAVERSAL 0
+#endif /* NO_SEGMENT_TRAVERSAL */
+
+/*
+ mallopt tuning options. SVID/XPG defines four standard parameter
+ numbers for mallopt, normally defined in malloc.h. None of these
+ are used in this malloc, so setting them has no effect. But this
+ malloc does support the following options.
+*/
+
+#define M_TRIM_THRESHOLD (-1)
+#define M_GRANULARITY (-2)
+#define M_MMAP_THRESHOLD (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+#if !NO_MALLINFO
+/*
+ This version of malloc supports the standard SVID/XPG mallinfo
+ routine that returns a struct containing usage properties and
+ statistics. It should work on any system that has a
+ /usr/include/malloc.h defining struct mallinfo. The main
+ declaration needed is the mallinfo struct that is returned (by-copy)
+ by mallinfo(). The malloinfo struct contains a bunch of fields that
+ are not even meaningful in this version of malloc. These fields are
+ are instead filled by mallinfo() with other numbers that might be of
+ interest.
+
+ HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+ /usr/include/malloc.h file that includes a declaration of struct
+ mallinfo. If so, it is included; else a compliant version is
+ declared below. These must be precisely the same for mallinfo() to
+ work. The original SVID version of this struct, defined on most
+ systems with mallinfo, declares all fields as ints. But some others
+ define as unsigned long. If your system defines the fields using a
+ type of different width than listed here, you MUST #include your
+ system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
+#ifndef STRUCT_MALLINFO_DECLARED
+#define STRUCT_MALLINFO_DECLARED 1
+struct mallinfo {
+ MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */
+ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */
+ MALLINFO_FIELD_TYPE smblks; /* always 0 */
+ MALLINFO_FIELD_TYPE hblks; /* always 0 */
+ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */
+ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */
+ MALLINFO_FIELD_TYPE fsmblks; /* always 0 */
+ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
+ MALLINFO_FIELD_TYPE fordblks; /* total free space */
+ MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
+};
+#endif /* STRUCT_MALLINFO_DECLARED */
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
+#endif /* NO_MALLINFO */
+
+/*
+ Try to persuade compilers to inline. The most critical functions for
+ inlining are defined as macros, so these aren't used for them.
+*/
+
+#ifndef FORCEINLINE
+ #if defined(__GNUC__)
+#define FORCEINLINE __inline __attribute__ ((always_inline))
+ #elif defined(_MSC_VER)
+ #define FORCEINLINE __forceinline
+ #endif
+#endif
+#ifndef NOINLINE
+ #if defined(__GNUC__)
+ #define NOINLINE __attribute__ ((noinline))
+ #elif defined(_MSC_VER)
+ #define NOINLINE __declspec(noinline)
+ #else
+ #define NOINLINE
+ #endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#ifndef FORCEINLINE
+ #define FORCEINLINE inline
+#endif
+#endif /* __cplusplus */
+#ifndef FORCEINLINE
+ #define FORCEINLINE
+#endif
+
+#if !ONLY_MSPACES
+
+/* ------------------- Declarations of public routines ------------------- */
+
+#ifndef USE_DL_PREFIX
+#define dlcalloc calloc
+#define dlfree free
+#define dlmalloc malloc
+#define dlmemalign memalign
+#define dlrealloc realloc
+#define dlvalloc valloc
+#define dlpvalloc pvalloc
+#define dlmallinfo mallinfo
+#define dlmallopt mallopt
+#define dlmalloc_trim malloc_trim
+#define dlmalloc_stats malloc_stats
+#define dlmalloc_usable_size malloc_usable_size
+#define dlmalloc_footprint malloc_footprint
+#define dlmalloc_max_footprint malloc_max_footprint
+#define dlindependent_calloc independent_calloc
+#define dlindependent_comalloc independent_comalloc
+#endif /* USE_DL_PREFIX */
+
+
+/*
+ malloc(size_t n)
+ Returns a pointer to a newly allocated chunk of at least n bytes, or
+ null if no space is available, in which case errno is set to ENOMEM
+ on ANSI C systems.
+
+ If n is zero, malloc returns a minimum-sized chunk. (The minimum
+ size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+ systems.) Note that size_t is an unsigned type, so calls with
+ arguments that would be negative if signed are interpreted as
+ requests for huge amounts of space, which will often fail. The
+ maximum supported value of n differs across systems, but is in all
+ cases less than the maximum representable value of a size_t.
+*/
+void* dlmalloc(size_t);
+
+/*
+ free(void* p)
+ Releases the chunk of memory pointed to by p, that had been previously
+ allocated using malloc or a related routine such as realloc.
+ It has no effect if p is null. If p was not malloced or already
+ freed, free(p) will by default cause the current program to abort.
+*/
+void dlfree(void*);
+
+/*
+ calloc(size_t n_elements, size_t element_size);
+ Returns a pointer to n_elements * element_size bytes, with all locations
+ set to zero.
+*/
+void* dlcalloc(size_t, size_t);
+
+/*
+ realloc(void* p, size_t n)
+ Returns a pointer to a chunk of size n that contains the same data
+ as does chunk p up to the minimum of (n, p's size) bytes, or null
+ if no space is available.
+
+ The returned pointer may or may not be the same as p. The algorithm
+ prefers extending p in most cases when possible, otherwise it
+ employs the equivalent of a malloc-copy-free sequence.
+
+ If p is null, realloc is equivalent to malloc.
+
+ If space is not available, realloc returns null, errno is set (if on
+ ANSI) and p is NOT freed.
+
+ if n is for fewer bytes than already held by p, the newly unused
+ space is lopped off and freed if possible. realloc with a size
+ argument of zero (re)allocates a minimum-sized chunk.
+
+ The old unix realloc convention of allowing the last-free'd chunk
+ to be used as an argument to realloc is not supported.
+*/
+
+void* dlrealloc(void*, size_t);
+
+/*
+ memalign(size_t alignment, size_t n);
+ Returns a pointer to a newly allocated chunk of n bytes, aligned
+ in accord with the alignment argument.
+
+ The alignment argument should be a power of two. If the argument is
+ not a power of two, the nearest greater power is used.
+ 8-byte alignment is guaranteed by normal malloc calls, so don't
+ bother calling memalign with an argument of 8 or less.
+
+ Overreliance on memalign is a sure way to fragment space.
+*/
+void* dlmemalign(size_t, size_t);
+
+/*
+ valloc(size_t n);
+ Equivalent to memalign(pagesize, n), where pagesize is the page
+ size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void* dlvalloc(size_t);
+
+/*
+ mallopt(int parameter_number, int parameter_value)
+ Sets tunable parameters The format is to provide a
+ (parameter-number, parameter-value) pair. mallopt then sets the
+ corresponding parameter to the argument value if it can (i.e., so
+ long as the value is meaningful), and returns 1 if successful else
+ 0. To workaround the fact that mallopt is specified to use int,
+ not size_t parameters, the value -1 is specially treated as the
+ maximum unsigned size_t value.
+
+ SVID/XPG/ANSI defines four standard param numbers for mallopt,
+ normally defined in malloc.h. None of these are use in this malloc,
+ so setting them has no effect. But this malloc also supports other
+ options in mallopt. See below for details. Briefly, supported
+ parameters are as follows (listed defaults are for "typical"
+ configurations).
+
+ Symbol param # default allowed param values
+ M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables)
+ M_GRANULARITY -2 page size any power of 2 >= page size
+ M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support)
+*/
+int dlmallopt(int, int);
+
+/*
+ malloc_footprint();
+ Returns the number of bytes obtained from the system. The total
+ number of bytes allocated by malloc, realloc etc., is less than this
+ value. Unlike mallinfo, this function returns only a precomputed
+ result, so can be called frequently to monitor memory consumption.
+ Even if locks are otherwise defined, this function does not use them,
+ so results might not be up to date.
+*/
+size_t dlmalloc_footprint(void);
+
+/*
+ malloc_max_footprint();
+ Returns the maximum number of bytes obtained from the system. This
+ value will be greater than current footprint if deallocated space
+ has been reclaimed by the system. The peak number of bytes allocated
+ by malloc, realloc etc., is less than this value. Unlike mallinfo,
+ this function returns only a precomputed result, so can be called
+ frequently to monitor memory consumption. Even if locks are
+ otherwise defined, this function does not use them, so results might
+ not be up to date.
+*/
+size_t dlmalloc_max_footprint(void);
+
+#if !NO_MALLINFO
+/*
+ mallinfo()
+ Returns (by copy) a struct containing various summary statistics:
+
+ arena: current total non-mmapped bytes allocated from system
+ ordblks: the number of free chunks
+ smblks: always zero.
+ hblks: current number of mmapped regions
+ hblkhd: total bytes held in mmapped regions
+ usmblks: the maximum total allocated space. This will be greater
+ than current total if trimming has occurred.
+ fsmblks: always zero
+ uordblks: current total allocated space (normal or mmapped)
+ fordblks: total free space
+ keepcost: the maximum number of bytes that could ideally be released
+ back to system via malloc_trim. ("ideally" means that
+ it ignores page restrictions etc.)
+
+ Because these fields are ints, but internal bookkeeping may
+ be kept as longs, the reported values may wrap around zero and
+ thus be inaccurate.
+*/
+struct mallinfo dlmallinfo(void);
+#endif /* NO_MALLINFO */
+
+/*
+ independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+ independent_calloc is similar to calloc, but instead of returning a
+ single cleared space, it returns an array of pointers to n_elements
+ independent elements that can hold contents of size elem_size, each
+ of which starts out cleared, and can be independently freed,
+ realloc'ed etc. The elements are guaranteed to be adjacently
+ allocated (this is not guaranteed to occur with multiple callocs or
+ mallocs), which may also improve cache locality in some
+ applications.
+
+ The "chunks" argument is optional (i.e., may be null, which is
+ probably the most typical usage). If it is null, the returned array
+ is itself dynamically allocated and should also be freed when it is
+ no longer needed. Otherwise, the chunks array must be of at least
+ n_elements in length. It is filled in with the pointers to the
+ chunks.
+
+ In either case, independent_calloc returns this pointer array, or
+ null if the allocation failed. If n_elements is zero and "chunks"
+ is null, it returns a chunk representing an array with zero elements
+ (which should be freed if not wanted).
+
+ Each element must be individually freed when it is no longer
+ needed. If you'd like to instead be able to free all at once, you
+ should instead use regular calloc and assign pointers into this
+ space to represent elements. (In this case though, you cannot
+ independently free elements.)
+
+ independent_calloc simplifies and speeds up implementations of many
+ kinds of pools. It may also be useful when constructing large data
+ structures that initially have a fixed number of fixed-sized nodes,
+ but the number is not known at compile time, and some of the nodes
+ may later need to be freed. For example:
+
+ struct Node { int item; struct Node* next; };
+
+ struct Node* build_list() {
+ struct Node** pool;
+ int n = read_number_of_nodes_needed();
+ if (n <= 0) return 0;
+ pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+ if (pool == 0) die();
+ // organize into a linked list...
+ struct Node* first = pool[0];
+ for (i = 0; i < n-1; ++i)
+ pool[i]->next = pool[i+1];
+ free(pool); // Can now free the array (or not, if it is needed later)
+ return first;
+ }
+*/
+void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+ independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+ independent_comalloc allocates, all at once, a set of n_elements
+ chunks with sizes indicated in the "sizes" array. It returns
+ an array of pointers to these elements, each of which can be
+ independently freed, realloc'ed etc. The elements are guaranteed to
+ be adjacently allocated (this is not guaranteed to occur with
+ multiple callocs or mallocs), which may also improve cache locality
+ in some applications.
+
+ The "chunks" argument is optional (i.e., may be null). If it is null
+ the returned array is itself dynamically allocated and should also
+ be freed when it is no longer needed. Otherwise, the chunks array
+ must be of at least n_elements in length. It is filled in with the
+ pointers to the chunks.
+
+ In either case, independent_comalloc returns this pointer array, or
+ null if the allocation failed. If n_elements is zero and chunks is
+ null, it returns a chunk representing an array with zero elements
+ (which should be freed if not wanted).
+
+ Each element must be individually freed when it is no longer
+ needed. If you'd like to instead be able to free all at once, you
+ should instead use a single regular malloc, and assign pointers at
+ particular offsets in the aggregate space. (In this case though, you
+ cannot independently free elements.)
+
+ independent_comallac differs from independent_calloc in that each
+ element may have a different size, and also that it does not
+ automatically clear elements.
+
+ independent_comalloc can be used to speed up allocation in cases
+ where several structs or objects must always be allocated at the
+ same time. For example:
+
+ struct Head { ... }
+ struct Foot { ... }
+
+ void send_message(char* msg) {
+ int msglen = strlen(msg);
+ size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+ void* chunks[3];
+ if (independent_comalloc(3, sizes, chunks) == 0)
+ die();
+ struct Head* head = (struct Head*)(chunks[0]);
+ char* body = (char*)(chunks[1]);
+ struct Foot* foot = (struct Foot*)(chunks[2]);
+ // ...
+ }
+
+ In general though, independent_comalloc is worth using only for
+ larger values of n_elements. For small values, you probably won't
+ detect enough difference from series of malloc calls to bother.
+
+ Overuse of independent_comalloc can increase overall memory usage,
+ since it cannot reuse existing noncontiguous small chunks that
+ might be available for some of the elements.
+*/
+void** dlindependent_comalloc(size_t, size_t*, void**);
+
+
+/*
+ pvalloc(size_t n);
+ Equivalent to valloc(minimum-page-that-holds(n)), that is,
+ round up n to nearest pagesize.
+ */
+void* dlpvalloc(size_t);
+
+/*
+ malloc_trim(size_t pad);
+
+ If possible, gives memory back to the system (via negative arguments
+ to sbrk) if there is unused memory at the `high' end of the malloc
+ pool or in unused MMAP segments. You can call this after freeing
+ large blocks of memory to potentially reduce the system-level memory
+ requirements of a program. However, it cannot guarantee to reduce
+ memory. Under some allocation patterns, some large free blocks of
+ memory will be locked between two used chunks, so they cannot be
+ given back to the system.
+
+ The `pad' argument to malloc_trim represents the amount of free
+ trailing space to leave untrimmed. If this argument is zero, only
+ the minimum amount of memory to maintain internal data structures
+ will be left. Non-zero arguments can be supplied to maintain enough
+ trailing space to service future expected allocations without having
+ to re-obtain memory from the system.
+
+ Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+int dlmalloc_trim(size_t);
+
+/*
+ malloc_stats();
+ Prints on stderr the amount of space obtained from the system (both
+ via sbrk and mmap), the maximum amount (which may be more than
+ current if malloc_trim and/or munmap got called), and the current
+ number of bytes allocated via malloc (or realloc, etc) but not yet
+ freed. Note that this is the number of bytes allocated, not the
+ number requested. It will be larger than the number requested
+ because of alignment and bookkeeping overhead. Because it includes
+ alignment wastage as being in use, this figure may be greater than
+ zero even when no user-level chunks are allocated.
+
+ The reported current and maximum system memory can be inaccurate if
+ a program makes other calls to system memory allocation functions
+ (normally sbrk) outside of malloc.
+
+ malloc_stats prints only the most commonly interesting statistics.
+ More information can be obtained by calling mallinfo.
+*/
+void dlmalloc_stats(void);
+
+#endif /* ONLY_MSPACES */
+
+/*
+ malloc_usable_size(void* p);
+
+ Returns the number of bytes you can actually use in
+ an allocated chunk, which may be more than you requested (although
+ often not) due to alignment and minimum size constraints.
+ You can use this many bytes without worrying about
+ overwriting other allocated objects. This is not a particularly great
+ programming practice. malloc_usable_size can be more useful in
+ debugging and assertions, for example:
+
+ p = malloc(n);
+ assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void*);
+
+
+#if MSPACES
+
+/*
+ mspace is an opaque type representing an independent
+ region of space that supports mspace_malloc, etc.
+*/
+typedef void* mspace;
+
+/*
+ create_mspace creates and returns a new independent space with the
+ given initial capacity, or, if 0, the default granularity size. It
+ returns null if there is no system memory available to create the
+ space. If argument locked is non-zero, the space uses a separate
+ lock to control access. The capacity of the space will grow
+ dynamically as needed to service mspace_malloc requests. You can
+ control the sizes of incremental increases of this space by
+ compiling with a different DEFAULT_GRANULARITY or dynamically
+ setting with mallopt(M_GRANULARITY, value).
+*/
+mspace create_mspace(size_t capacity, int locked);
+
+/*
+ destroy_mspace destroys the given space, and attempts to return all
+ of its memory back to the system, returning the total number of
+ bytes freed. After destruction, the results of access to all memory
+ used by the space become undefined.
+*/
+size_t destroy_mspace(mspace msp);
+
+/*
+ create_mspace_with_base uses the memory supplied as the initial base
+ of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+ space is used for bookkeeping, so the capacity must be at least this
+ large. (Otherwise 0 is returned.) When this initial space is
+ exhausted, additional memory will be obtained from the system.
+ Destroying this space will deallocate all additionally allocated
+ space (if possible) but not the initial base.
+*/
+mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+/*
+ mspace_track_large_chunks controls whether requests for large chunks
+ are allocated in their own untracked mmapped regions, separate from
+ others in this mspace. By default large chunks are not tracked,
+ which reduces fragmentation. However, such chunks are not
+ necessarily released to the system upon destroy_mspace. Enabling
+ tracking by setting to true may increase fragmentation, but avoids
+ leakage when relying on destroy_mspace to release all memory
+ allocated using this space. The function returns the previous
+ setting.
+*/
+int mspace_track_large_chunks(mspace msp, int enable);
+
+
+/*
+ mspace_malloc behaves as malloc, but operates within
+ the given space.
+*/
+void* mspace_malloc(mspace msp, size_t bytes);
+
+/*
+ mspace_free behaves as free, but operates within
+ the given space.
+
+ If compiled with FOOTERS==1, mspace_free is not actually needed.
+ free may be called instead of mspace_free because freed chunks from
+ any space are handled by their originating spaces.
+*/
+void mspace_free(mspace msp, void* mem);
+
+/*
+ mspace_realloc behaves as realloc, but operates within
+ the given space.
+
+ If compiled with FOOTERS==1, mspace_realloc is not actually
+ needed. realloc may be called instead of mspace_realloc because
+ realloced chunks from any space are handled by their originating
+ spaces.
+*/
+void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+/*
+ mspace_calloc behaves as calloc, but operates within
+ the given space.
+*/
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+
+/*
+ mspace_memalign behaves as memalign, but operates within
+ the given space.
+*/
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+
+/*
+ mspace_independent_calloc behaves as independent_calloc, but
+ operates within the given space.
+*/
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+ size_t elem_size, void* chunks[]);
+
+/*
+ mspace_independent_comalloc behaves as independent_comalloc, but
+ operates within the given space.
+*/
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+ size_t sizes[], void* chunks[]);
+
+/*
+ mspace_footprint() returns the number of bytes obtained from the
+ system for this space.
+*/
+size_t mspace_footprint(mspace msp);
+
+/*
+ mspace_max_footprint() returns the peak number of bytes obtained from the
+ system for this space.
+*/
+size_t mspace_max_footprint(mspace msp);
+
+
+#if !NO_MALLINFO
+/*
+ mspace_mallinfo behaves as mallinfo, but reports properties of
+ the given space.
+*/
+struct mallinfo mspace_mallinfo(mspace msp);
+#endif /* NO_MALLINFO */
+
+/*
+ malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+*/
+ size_t mspace_usable_size(void* mem);
+
+/*
+ mspace_malloc_stats behaves as malloc_stats, but reports
+ properties of the given space.
+*/
+void mspace_malloc_stats(mspace msp);
+
+/*
+ mspace_trim behaves as malloc_trim, but
+ operates within the given space.
+*/
+int mspace_trim(mspace msp, size_t pad);
+
+/*
+ An alias for mallopt.
+*/
+int mspace_mallopt(int, int);
+
+#endif /* MSPACES */
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif /* __cplusplus */
+
+/*
+ ========================================================================
+ To make a fully customizable malloc.h header file, cut everything
+ above this line, put into file malloc.h, edit to suit, and #include it
+ on the next line, as well as in programs that use this malloc.
+ ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef WIN32
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* WIN32 */
+
+#include <stdio.h> /* for printing in malloc_stats */
+
+#ifndef LACKS_ERRNO_H
+#include <errno.h> /* for MALLOC_FAILURE_ACTION */
+#endif /* LACKS_ERRNO_H */
+#if FOOTERS || DEBUG
+#include <time.h> /* for magic initialization */
+#endif /* FOOTERS */
+#ifndef LACKS_STDLIB_H
+#include <stdlib.h> /* for abort() */
+#endif /* LACKS_STDLIB_H */
+#ifdef DEBUG
+#if ABORT_ON_ASSERT_FAILURE
+#undef assert
+#define assert(x) if(!(x)) ABORT
+#else /* ABORT_ON_ASSERT_FAILURE */
+#include <assert.h>
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#else /* DEBUG */
+#ifndef assert
+#define assert(x)
+#endif
+#define DEBUG 0
+#endif /* DEBUG */
+#ifndef LACKS_STRING_H
+#include <string.h> /* for memset etc */
+#endif /* LACKS_STRING_H */
+#if USE_BUILTIN_FFS
+#ifndef LACKS_STRINGS_H
+#include <strings.h> /* for ffs */
+#endif /* LACKS_STRINGS_H */
+#endif /* USE_BUILTIN_FFS */
+#if HAVE_MMAP
+#ifndef LACKS_SYS_MMAN_H
+/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+#if (defined(linux) && !defined(__USE_GNU))
+#define __USE_GNU 1
+#include <sys/mman.h> /* for mmap */
+#undef __USE_GNU
+#else
+#include <sys/mman.h> /* for mmap */
+#endif /* linux */
+#endif /* LACKS_SYS_MMAN_H */
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif /* LACKS_FCNTL_H */
+#endif /* HAVE_MMAP */
+#ifndef LACKS_UNISTD_H
+#include <unistd.h> /* for sbrk, sysconf */
+#else /* LACKS_UNISTD_H */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void* sbrk(ptrdiff_t);
+#endif /* FreeBSD etc */
+#endif /* LACKS_UNISTD_H */
+
+/* Declarations for locking */
+#if USE_LOCKS
+#ifndef WIN32
+#include <pthread.h>
+#if defined (__SVR4) && defined (__sun) /* solaris */
+#include <thread.h>
+#endif /* solaris */
+#else
+#ifndef _M_AMD64
+/* These are already defined on AMD64 builds */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
+LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _M_AMD64 */
+#pragma intrinsic (_InterlockedCompareExchange)
+#pragma intrinsic (_InterlockedExchange)
+#define interlockedcompareexchange _InterlockedCompareExchange
+#define interlockedexchange _InterlockedExchange
+#endif /* Win32 */
+#endif /* USE_LOCKS */
+
+/* Declarations for bit scanning on win32 */
+#if defined(_MSC_VER) && _MSC_VER>=1300
+#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#define BitScanForward _BitScanForward
+#define BitScanReverse _BitScanReverse
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#endif /* BitScanForward */
+#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+#ifndef WIN32
+#ifndef malloc_getpagesize
+# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
+# ifndef _SC_PAGE_SIZE
+# define _SC_PAGE_SIZE _SC_PAGESIZE
+# endif
+# endif
+# ifdef _SC_PAGE_SIZE
+# define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+# else
+# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+ extern size_t getpagesize();
+# define malloc_getpagesize getpagesize()
+# else
+# ifdef WIN32 /* use supplied emulation of getpagesize */
+# define malloc_getpagesize getpagesize()
+# else
+# ifndef LACKS_SYS_PARAM_H
+# include <sys/param.h>
+# endif
+# ifdef EXEC_PAGESIZE
+# define malloc_getpagesize EXEC_PAGESIZE
+# else
+# ifdef NBPG
+# ifndef CLSIZE
+# define malloc_getpagesize NBPG
+# else
+# define malloc_getpagesize (NBPG * CLSIZE)
+# endif
+# else
+# ifdef NBPC
+# define malloc_getpagesize NBPC
+# else
+# ifdef PAGESIZE
+# define malloc_getpagesize PAGESIZE
+# else /* just guess */
+# define malloc_getpagesize ((size_t)4096U)
+# endif
+# endif
+# endif
+# endif
+# endif
+# endif
+# endif
+#endif
+#endif
+
+
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE (sizeof(size_t))
+#define SIZE_T_BITSIZE (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define SIZE_T_ZERO ((size_t)0)
+#define SIZE_T_ONE ((size_t)1)
+#define SIZE_T_TWO ((size_t)2)
+#define SIZE_T_FOUR ((size_t)4)
+#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* True if address a has acceptable alignment */
+#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+ ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/*
+ malloc_params holds global properties, including those that can be
+ dynamically set using mallopt. There is a single instance, mparams,
+ initialized in init_mparams. Note that the non-zeroness of "magic"
+ also serves as an initialization flag.
+*/
+typedef unsigned int flag_t;
+struct malloc_params {
+ volatile size_t magic;
+ size_t page_size;
+ size_t granularity;
+ size_t mmap_threshold;
+ size_t trim_threshold;
+ flag_t default_mflags;
+};
+
+static struct malloc_params mparams;
+
+/* Ensure mparams initialized */
+#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+ If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+ checks to fail so compiler optimizer can delete code rather than
+ using so many "#if"s.
+*/
+
+
+/* MORECORE and MMAP must return MFAIL on failure */
+#define MFAIL ((void*)(MAX_SIZE_T))
+#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */
+
+#if HAVE_MMAP
+
+#ifndef WIN32
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif /* MAP_ANON */
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+#define MMAP_IMPL mmap_aligned
+static void* lastAlignedmmap; /* Used as a hint */
+static void* mmap_aligned(void *start, size_t length, int prot, int flags, int fd, off_t offset) {
+ void* baseaddress = 0;
+ void* ptr = 0;
+ if(!start) {
+ baseaddress = lastAlignedmmap;
+ for(;;) {
+ if(baseaddress) flags|=MAP_FIXED;
+ ptr = mmap(baseaddress, length, prot, flags, fd, offset);
+ if(!ptr)
+ baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
+ else if((size_t)ptr & (mparams.granularity - SIZE_T_ONE)) {
+ munmap(ptr, length);
+ baseaddress = (void*)(((size_t)ptr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
+ }
+ else break;
+ }
+ }
+ else ptr = mmap(start, length, prot, flags, fd, offset);
+ if(ptr) lastAlignedmmap = (void*)((size_t) ptr + mparams.granularity);
+ return ptr;
+}
+#else
+#define MMAP_IMPL mmap
+#endif /* DEFAULT_GRANULARITY_ALIGNED */
+#define MUNMAP_DEFAULT(a, s) munmap((a), (s))
+#define MMAP_PROT (PROT_READ|PROT_WRITE)
+#ifdef MAP_ANONYMOUS
+#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
+#define MMAP_DEFAULT(s) MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#else /* MAP_ANONYMOUS */
+/*
+ Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+ is unlikely to be needed, but is supplied just in case.
+*/
+#define MMAP_FLAGS (MAP_PRIVATE)
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
+ (dev_zero_fd = open("/dev/zero", O_RDWR), \
+ MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
+ MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+#endif /* MAP_ANONYMOUS */
+
+#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+
+#else /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+static void* lastWin32mmap; /* Used as a hint */
+#endif /* DEFAULT_GRANULARITY_ALIGNED */
+#ifdef ENABLE_LARGE_PAGES
+static int largepagesavailable = 1;
+#endif /* ENABLE_LARGE_PAGES */
+static FORCEINLINE void* win32mmap(size_t size) {
+ void* baseaddress = 0;
+ void* ptr = 0;
+#ifdef ENABLE_LARGE_PAGES
+ /* Note that large pages are *always* allocated on a large page boundary.
+ If however granularity is small then don't waste a kernel call if size
+ isn't around the size of a large page */
+ if(largepagesavailable && size >= 1*1024*1024) {
+ ptr = VirtualAlloc(baseaddress, size, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE);
+ if(!ptr && ERROR_PRIVILEGE_NOT_HELD==GetLastError()) largepagesavailable=0;
+ }
+#endif
+ if(!ptr) {
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+ /* We try to avoid overhead by speculatively reserving at aligned
+ addresses until we succeed */
+ baseaddress = lastWin32mmap;
+ for(;;) {
+ void* reserveaddr = VirtualAlloc(baseaddress, size, MEM_RESERVE, PAGE_READWRITE);
+ if(!reserveaddr)
+ baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
+ else if((size_t)reserveaddr & (mparams.granularity - SIZE_T_ONE)) {
+ VirtualFree(reserveaddr, 0, MEM_RELEASE);
+ baseaddress = (void*)(((size_t)reserveaddr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
+ }
+ else break;
+ }
+#endif
+ if(!ptr) ptr = VirtualAlloc(baseaddress, size, baseaddress ? MEM_COMMIT : MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+#if DEBUG
+ if(lastWin32mmap && ptr!=lastWin32mmap) printf("Non-contiguous VirtualAlloc between %p and %p\n", ptr, lastWin32mmap);
+#endif
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+ if(ptr) lastWin32mmap = (void*)((size_t) ptr + mparams.granularity);
+#endif
+ }
+#if DEBUG
+#ifdef ENABLE_LARGE_PAGES
+ printf("VirtualAlloc returns %p size %u. LargePagesAvailable=%d\n", ptr, size, largepagesavailable);
+#else
+ printf("VirtualAlloc returns %p size %u\n", ptr, size);
+#endif
+#endif
+ return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static FORCEINLINE void* win32direct_mmap(size_t size) {
+ void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+ PAGE_READWRITE);
+ return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static FORCEINLINE int win32munmap(void* ptr, size_t size) {
+ MEMORY_BASIC_INFORMATION minfo;
+ char* cptr = (char*)ptr;
+ while (size) {
+ if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+ return -1;
+ if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+ minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+ return -1;
+ if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+ return -1;
+ cptr += minfo.RegionSize;
+ size -= minfo.RegionSize;
+ }
+ return 0;
+}
+
+#define MMAP_DEFAULT(s) win32mmap(s)
+#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
+#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* HAVE_MMAP */
+
+#if HAVE_MREMAP
+#ifndef WIN32
+#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#endif /* WIN32 */
+#endif /* HAVE_MREMAP */
+
+
+/**
+ * Define CALL_MORECORE
+ */
+#if HAVE_MORECORE
+ #ifdef MORECORE
+ #define CALL_MORECORE(S) MORECORE(S)
+ #else /* MORECORE */
+ #define CALL_MORECORE(S) MORECORE_DEFAULT(S)
+ #endif /* MORECORE */
+#else /* HAVE_MORECORE */
+ #define CALL_MORECORE(S) MFAIL
+#endif /* HAVE_MORECORE */
+
+/**
+ * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+ */
+#if HAVE_MMAP
+ #define USE_MMAP_BIT (SIZE_T_ONE)
+
+ #ifdef MMAP
+ #define CALL_MMAP(s) MMAP(s)
+ #else /* MMAP */
+ #define CALL_MMAP(s) MMAP_DEFAULT(s)
+ #endif /* MMAP */
+ #ifdef MUNMAP
+ #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
+ #else /* MUNMAP */
+ #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s))
+ #endif /* MUNMAP */
+ #ifdef DIRECT_MMAP
+ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+ #else /* DIRECT_MMAP */
+ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+ #endif /* DIRECT_MMAP */
+#else /* HAVE_MMAP */
+ #define USE_MMAP_BIT (SIZE_T_ZERO)
+
+ #define MMAP(s) MFAIL
+ #define MUNMAP(a, s) (-1)
+ #define DIRECT_MMAP(s) MFAIL
+ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+ #define CALL_MMAP(s) MMAP(s)
+ #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
+#endif /* HAVE_MMAP */
+
+/**
+ * Define CALL_MREMAP
+ */
+#if HAVE_MMAP && HAVE_MREMAP
+ #ifdef MREMAP
+ #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+ #else /* MREMAP */
+ #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+ #endif /* MREMAP */
+#else /* HAVE_MMAP && HAVE_MREMAP */
+ #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
+#endif /* HAVE_MMAP && HAVE_MREMAP */
+
+/* mstate bit set if continguous morecore disabled or failed */
+#define USE_NONCONTIGUOUS_BIT (4U)
+
+/* segment bit set in create_mspace_with_base */
+#define EXTERN_BIT (8U)
+
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+/*
+ When locks are defined, there is one global lock, plus
+ one per-mspace lock.
+
+ The global lock_ensures that mparams.magic and other unique
+ mparams values are initialized only once. It also protects
+ sequences of calls to MORECORE. In many cases sys_alloc requires
+ two calls, that should not be interleaved with calls by other
+ threads. This does not protect against direct calls to MORECORE
+ by other threads not using this lock, so there is still code to
+ cope the best we can on interference.
+
+ Per-mspace locks surround calls to malloc, free, etc. To enable use
+ in layered extensions, per-mspace locks are reentrant.
+
+ Because lock-protected regions generally have bounded times, it is
+ OK to use the supplied simple spinlocks in the custom versions for
+ x86. Spinlocks are likely to improve performance for lightly
+ contended applications, but worsen performance under heavy
+ contention.
+
+ If USE_LOCKS is > 1, the definitions of lock routines here are
+ bypassed, in which case you will need to define the type MLOCK_T,
+ and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly
+ TRY_LOCK (which is not used in this malloc, but commonly needed in
+ extensions.) You must also declare a
+ static MLOCK_T malloc_global_mutex = { initialization values };.
+
+*/
+
+#if USE_LOCKS == 1
+
+#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE
+#ifndef WIN32
+
+/* Custom pthread-style spin locks on x86 and x64 for gcc */
+struct pthread_mlock_t {
+ volatile unsigned int l;
+ char cachelinepadding[64];
+ unsigned int c;
+ pthread_t threadid;
+};
+#define MLOCK_T struct pthread_mlock_t
+#define CURRENT_THREAD pthread_self()
+#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
+#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl)
+#define RELEASE_LOCK(sl) pthread_release_lock(sl)
+#define TRY_LOCK(sl) pthread_try_lock(sl)
+#define SPINS_PER_YIELD 63
+
+static MLOCK_T malloc_global_mutex = { 0, "", 0, 0};
+
+static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) {
+ int spins = 0;
+ volatile unsigned int* lp = &sl->l;
+ for (;;) {
+ if (*lp != 0) {
+ if (sl->threadid == CURRENT_THREAD) {
+ ++sl->c;
+ return 0;
+ }
+ }
+ else {
+ /* place args to cmpxchgl in locals to evade oddities in some gccs */
+ int cmp = 0;
+ int val = 1;
+ int ret;
+ __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+ : "=a" (ret)
+ : "r" (val), "m" (*(lp)), "0"(cmp)
+ : "memory", "cc");
+ if (!ret) {
+ assert(!sl->threadid);
+ sl->threadid = CURRENT_THREAD;
+ sl->c = 1;
+ return 0;
+ }
+ }
+ if ((++spins & SPINS_PER_YIELD) == 0) {
+#if defined (__SVR4) && defined (__sun) /* solaris */
+ thr_yield();
+#else
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
+ sched_yield();
+#else /* no-op yield on unknown systems */
+ ;
+#endif /* __linux__ || __FreeBSD__ || __APPLE__ */
+#endif /* solaris */
+ }
+ }
+}
+
+static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) {
+ volatile unsigned int* lp = &sl->l;
+ assert(*lp != 0);
+ assert(sl->threadid == CURRENT_THREAD);
+ if (--sl->c == 0) {
+ sl->threadid = 0;
+ int prev = 0;
+ int ret;
+ __asm__ __volatile__ ("lock; xchgl %0, %1"
+ : "=r" (ret)
+ : "m" (*(lp)), "0"(prev)
+ : "memory");
+ }
+}
+
+static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) {
+ volatile unsigned int* lp = &sl->l;
+ if (*lp != 0) {
+ if (sl->threadid == CURRENT_THREAD) {
+ ++sl->c;
+ return 1;
+ }
+ }
+ else {
+ int cmp = 0;
+ int val = 1;
+ int ret;
+ __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+ : "=a" (ret)
+ : "r" (val), "m" (*(lp)), "0"(cmp)
+ : "memory", "cc");
+ if (!ret) {
+ assert(!sl->threadid);
+ sl->threadid = CURRENT_THREAD;
+ sl->c = 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+#else /* WIN32 */
+/* Custom win32-style spin locks on x86 and x64 for MSC */
+struct win32_mlock_t {
+ volatile long l;
+ char cachelinepadding[64];
+ unsigned int c;
+ long threadid;
+};
+
+#define MLOCK_T struct win32_mlock_t
+#define CURRENT_THREAD ((long)GetCurrentThreadId())
+#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
+#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl)
+#define RELEASE_LOCK(sl) win32_release_lock(sl)
+#define TRY_LOCK(sl) win32_try_lock(sl)
+#define SPINS_PER_YIELD 63
+
+static MLOCK_T malloc_global_mutex = { 0, 0, 0};
+
+static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) {
+ int spins = 0;
+ for (;;) {
+ if (sl->l != 0) {
+ if (sl->threadid == CURRENT_THREAD) {
+ ++sl->c;
+ return 0;
+ }
+ }
+ else {
+ if (!interlockedexchange(&sl->l, 1)) {
+ assert(!sl->threadid);
+ sl->threadid = CURRENT_THREAD;
+ sl->c = 1;
+ return 0;
+ }
+ }
+ if ((++spins & SPINS_PER_YIELD) == 0)
+ SleepEx(0, FALSE);
+ }
+}
+
+static FORCEINLINE void win32_release_lock (MLOCK_T *sl) {
+ assert(sl->threadid == CURRENT_THREAD);
+ assert(sl->l != 0);
+ if (--sl->c == 0) {
+ sl->threadid = 0;
+ interlockedexchange (&sl->l, 0);
+ }
+}
+
+static FORCEINLINE int win32_try_lock (MLOCK_T *sl) {
+ if (sl->l != 0) {
+ if (sl->threadid == CURRENT_THREAD) {
+ ++sl->c;
+ return 1;
+ }
+ }
+ else {
+ if (!interlockedexchange(&sl->l, 1)){
+ assert(!sl->threadid);
+ sl->threadid = CURRENT_THREAD;
+ sl->c = 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#endif /* WIN32 */
+#else /* USE_SPIN_LOCKS */
+
+#ifndef WIN32
+/* pthreads-based locks */
+
+#define MLOCK_T pthread_mutex_t
+#define CURRENT_THREAD pthread_self()
+#define INITIAL_LOCK(sl) pthread_init_lock(sl)
+#define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl)
+#define RELEASE_LOCK(sl) pthread_mutex_unlock(sl)
+#define TRY_LOCK(sl) (!pthread_mutex_trylock(sl))
+
+static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Cope with old-style linux recursive lock initialization by adding */
+/* skipped internal declaration from pthread.h */
+#ifdef linux
+#ifndef PTHREAD_MUTEX_RECURSIVE
+extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
+ int __kind));
+#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
+#endif
+#endif
+
+static int pthread_init_lock (MLOCK_T *sl) {
+ pthread_mutexattr_t attr;
+ if (pthread_mutexattr_init(&attr)) return 1;
+ if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
+ if (pthread_mutex_init(sl, &attr)) return 1;
+ if (pthread_mutexattr_destroy(&attr)) return 1;
+ return 0;
+}
+
+#else /* WIN32 */
+/* Win32 critical sections */
+#define MLOCK_T CRITICAL_SECTION
+#define CURRENT_THREAD GetCurrentThreadId()
+#define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000))
+#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0)
+#define RELEASE_LOCK(s) LeaveCriticalSection(sl)
+#define TRY_LOCK(s) TryEnterCriticalSection(sl)
+#define NEED_GLOBAL_LOCK_INIT
+
+static MLOCK_T malloc_global_mutex;
+static volatile long malloc_global_mutex_status;
+
+/* Use spin loop to initialize global lock */
+static void init_malloc_global_mutex() {
+ for (;;) {
+ long stat = malloc_global_mutex_status;
+ if (stat > 0)
+ return;
+ /* transition to < 0 while initializing, then to > 0) */
+ if (stat == 0 &&
+ interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) {
+ InitializeCriticalSection(&malloc_global_mutex);
+ interlockedexchange(&malloc_global_mutex_status,1);
+ return;
+ }
+ SleepEx(0, FALSE);
+ }
+}
+
+#endif /* WIN32 */
+#endif /* USE_SPIN_LOCKS */
+#endif /* USE_LOCKS == 1 */
+
+/* ----------------------- User-defined locks ------------------------ */
+
+#if USE_LOCKS > 1
+/* Define your own lock implementation here */
+/* #define INITIAL_LOCK(sl) ... */
+/* #define ACQUIRE_LOCK(sl) ... */
+/* #define RELEASE_LOCK(sl) ... */
+/* #define TRY_LOCK(sl) ... */
+/* static MLOCK_T malloc_global_mutex = ... */
+#endif /* USE_LOCKS > 1 */
+
+/* ----------------------- Lock-based state ------------------------ */
+
+#if USE_LOCKS
+#define USE_LOCK_BIT (2U)
+#else /* USE_LOCKS */
+#define USE_LOCK_BIT (0U)
+#define INITIAL_LOCK(l)
+#endif /* USE_LOCKS */
+
+#if USE_LOCKS
+#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex);
+#endif
+#ifndef RELEASE_MALLOC_GLOBAL_LOCK
+#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex);
+#endif
+#else /* USE_LOCKS */
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()
+#define RELEASE_MALLOC_GLOBAL_LOCK()
+#endif /* USE_LOCKS */
+
+
+/* ----------------------- Chunk representations ------------------------ */
+
+/*
+ (The following includes lightly edited explanations by Colin Plumb.)
+
+ The malloc_chunk declaration below is misleading (but accurate and
+ necessary). It declares a "view" into memory allowing access to
+ necessary fields at known offsets from a given base.
+
+ Chunks of memory are maintained using a `boundary tag' method as
+ originally described by Knuth. (See the paper by Paul Wilson
+ ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+ techniques.) Sizes of free chunks are stored both in the front of
+ each chunk and at the end. This makes consolidating fragmented
+ chunks into bigger chunks fast. The head fields also hold bits
+ representing whether chunks are free or in use.
+
+ Here are some pictures to make it clearer. They are "exploded" to
+ show that the state of a chunk can be thought of as extending from
+ the high 31 bits of the head field of its header through the
+ prev_foot and PINUSE_BIT bit of the following chunk header.
+
+ A chunk that's in use looks like:
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk (if P = 0) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+ | Size of this chunk 1| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ +- -+
+ | |
+ +- -+
+ | :
+ +- size - sizeof(size_t) available payload bytes -+
+ : |
+ chunk-> +- -+
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+ | Size of next chunk (may or may not be in use) | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ And if it's free, it looks like this:
+
+ chunk-> +- -+
+ | User payload (must be in use, or we would have merged!) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+ | Size of this chunk 0| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Next pointer |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Prev pointer |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | :
+ +- size - sizeof(struct chunk) unused bytes -+
+ : |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of this chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+ | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | :
+ +- User payload -+
+ : |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |0|
+ +-+
+ Note that since we always merge adjacent free chunks, the chunks
+ adjacent to a free chunk must be in use.
+
+ Given a pointer to a chunk (which can be derived trivially from the
+ payload pointer) we can, in O(1) time, find out whether the adjacent
+ chunks are free, and if so, unlink them from the lists that they
+ are on and merge them with the current chunk.
+
+ Chunks always begin on even word boundaries, so the mem portion
+ (which is returned to the user) is also on an even word boundary, and
+ thus at least double-word aligned.
+
+ The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+ chunk size (which is always a multiple of two words), is an in-use
+ bit for the *previous* chunk. If that bit is *clear*, then the
+ word before the current chunk size contains the previous chunk
+ size, and can be used to find the front of the previous chunk.
+ The very first chunk allocated always has this bit set, preventing
+ access to non-existent (or non-owned) memory. If pinuse is set for
+ any given chunk, then you CANNOT determine the size of the
+ previous chunk, and might even get a memory addressing fault when
+ trying to do so.
+
+ The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+ the chunk size redundantly records whether the current chunk is
+ inuse (unless the chunk is mmapped). This redundancy enables usage
+ checks within free and realloc, and reduces indirection when freeing
+ and consolidating chunks.
+
+ Each freshly allocated chunk must have both cinuse and pinuse set.
+ That is, each allocated chunk borders either a previously allocated
+ and still in-use chunk, or the base of its memory arena. This is
+ ensured by making all allocations from the the `lowest' part of any
+ found chunk. Further, no free chunk physically borders another one,
+ so each free chunk is known to be preceded and followed by either
+ inuse chunks or the ends of memory.
+
+ Note that the `foot' of the current chunk is actually represented
+ as the prev_foot of the NEXT chunk. This makes it easier to
+ deal with alignments etc but can be very confusing when trying
+ to extend or adapt this code.
+
+ The exceptions to all this are
+
+ 1. The special chunk `top' is the top-most available chunk (i.e.,
+ the one bordering the end of available memory). It is treated
+ specially. Top is never included in any bin, is used only if
+ no other chunk is available, and is released back to the
+ system if it is very large (see M_TRIM_THRESHOLD). In effect,
+ the top chunk is treated as larger (and thus less well
+ fitting) than any other available chunk. The top chunk
+ doesn't update its trailing size field since there is no next
+ contiguous chunk that would have to index off it. However,
+ space is still allocated for it (TOP_FOOT_SIZE) to enable
+ separation or merging when space is extended.
+
+ 3. Chunks allocated via mmap, have both cinuse and pinuse bits
+ cleared in their head fields. Because they are allocated
+ one-by-one, each must carry its own prev_foot field, which is
+ also used to hold the offset this chunk has within its mmapped
+ region, which is needed to preserve alignment. Each mmapped
+ chunk is trailed by the first two fields of a fake next-chunk
+ for sake of usage checks.
+
+*/
+
+struct malloc_chunk {
+ size_t prev_foot; /* Size of previous chunk (if free). */
+ size_t head; /* Size and inuse bits. */
+ struct malloc_chunk* fd; /* double links -- used only if free. */
+ struct malloc_chunk* bk;
+};
+
+typedef struct malloc_chunk mchunk;
+typedef struct malloc_chunk* mchunkptr;
+typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */
+typedef unsigned int bindex_t; /* Described below */
+typedef unsigned int binmap_t; /* Described below */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE (sizeof(mchunk))
+
+#if FOOTERS
+#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+#else /* FOOTERS */
+#define CHUNK_OVERHEAD (SIZE_T_SIZE)
+#endif /* FOOTERS */
+
+/* MMapped chunks need a second word of overhead ... */
+#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+ ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2)
+#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+ (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+ (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+ The head field of a chunk is or'ed with PINUSE_BIT when previous
+ adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+ use, unless mmapped, in which case both bits are cleared.
+
+ FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+#define PINUSE_BIT (SIZE_T_ONE)
+#define CINUSE_BIT (SIZE_T_TWO)
+#define FLAG4_BIT (SIZE_T_FOUR)
+#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT)
+#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p) ((p)->head & CINUSE_BIT)
+#define pinuse(p) ((p)->head & PINUSE_BIT)
+#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT)
+#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0)
+
+#define chunksize(p) ((p)->head & ~(FLAG_BITS))
+
+#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
+#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot)
+#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+ ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+ (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* Return true if malloced space is not necessarily cleared */
+#if MMAP_CLEARS
+#define calloc_must_clear(p) (!is_mmapped(p))
+#else /* MMAP_CLEARS */
+#define calloc_must_clear(p) (1)
+#endif /* MMAP_CLEARS */
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+ When chunks are not in use, they are treated as nodes of either
+ lists or trees.
+
+ "Small" chunks are stored in circular doubly-linked lists, and look
+ like this:
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `head:' | Size of chunk, in bytes |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Forward pointer to next chunk in list |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Back pointer to previous chunk in list |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Unused space (may be 0 bytes long) .
+ . .
+ . |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `foot:' | Size of chunk, in bytes |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Larger chunks are kept in a form of bitwise digital trees (aka
+ tries) keyed on chunksizes. Because malloc_tree_chunks are only for
+ free chunks greater than 256 bytes, their size doesn't impose any
+ constraints on user chunk sizes. Each node looks like:
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `head:' | Size of chunk, in bytes |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Forward pointer to next chunk of same size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Back pointer to previous chunk of same size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Pointer to left child (child[0]) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Pointer to right child (child[1]) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Pointer to parent |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | bin index of this chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Unused space .
+ . |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `foot:' | Size of chunk, in bytes |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Each tree holding treenodes is a tree of unique chunk sizes. Chunks
+ of the same size are arranged in a circularly-linked list, with only
+ the oldest chunk (the next to be used, in our FIFO ordering)
+ actually in the tree. (Tree members are distinguished by a non-null
+ parent pointer.) If a chunk with the same size an an existing node
+ is inserted, it is linked off the existing node using pointers that
+ work in the same way as fd/bk pointers of small chunks.
+
+ Each tree contains a power of 2 sized range of chunk sizes (the
+ smallest is 0x100 <= x < 0x180), which is is divided in half at each
+ tree level, with the chunks in the smaller half of the range (0x100
+ <= x < 0x140 for the top nose) in the left subtree and the larger
+ half (0x140 <= x < 0x180) in the right subtree. This is, of course,
+ done by inspecting individual bits.
+
+ Using these rules, each node's left subtree contains all smaller
+ sizes than its right subtree. However, the node at the root of each
+ subtree has no particular ordering relationship to either. (The
+ dividing line between the subtree sizes is based on trie relation.)
+ If we remove the last chunk of a given size from the interior of the
+ tree, we need to replace it with a leaf node. The tree ordering
+ rules permit a node to be replaced by any leaf below it.
+
+ The smallest chunk in a tree (a common operation in a best-fit
+ allocator) can be found by walking a path to the leftmost leaf in
+ the tree. Unlike a usual binary tree, where we follow left child
+ pointers until we reach a null, here we follow the right child
+ pointer any time the left one is null, until we reach a leaf with
+ both child pointers null. The smallest chunk in the tree will be
+ somewhere along that path.
+
+ The worst case number of steps to add, find, or remove a node is
+ bounded by the number of bits differentiating chunks within
+ bins. Under current bin calculations, this ranges from 6 up to 21
+ (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+ is of course much better.
+*/
+
+struct malloc_tree_chunk {
+ /* The first four fields must be compatible with malloc_chunk */
+ size_t prev_foot;
+ size_t head;
+ struct malloc_tree_chunk* fd;
+ struct malloc_tree_chunk* bk;
+
+ struct malloc_tree_chunk* child[2];
+ struct malloc_tree_chunk* parent;
+ bindex_t index;
+};
+
+typedef struct malloc_tree_chunk tchunk;
+typedef struct malloc_tree_chunk* tchunkptr;
+typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+ Each malloc space may include non-contiguous segments, held in a
+ list headed by an embedded malloc_segment record representing the
+ top-most space. Segments also include flags holding properties of
+ the space. Large chunks that are directly allocated by mmap are not
+ included in this list. They are instead independently created and
+ destroyed without otherwise keeping track of them.
+
+ Segment management mainly comes into play for spaces allocated by
+ MMAP. Any call to MMAP might or might not return memory that is
+ adjacent to an existing segment. MORECORE normally contiguously
+ extends the current space, so this space is almost always adjacent,
+ which is simpler and faster to deal with. (This is why MORECORE is
+ used preferentially to MMAP when both are available -- see
+ sys_alloc.) When allocating using MMAP, we don't use any of the
+ hinting mechanisms (inconsistently) supported in various
+ implementations of unix mmap, or distinguish reserving from
+ committing memory. Instead, we just ask for space, and exploit
+ contiguity when we get it. It is probably possible to do
+ better than this on some systems, but no general scheme seems
+ to be significantly better.
+
+ Management entails a simpler variant of the consolidation scheme
+ used for chunks to reduce fragmentation -- new adjacent memory is
+ normally prepended or appended to an existing segment. However,
+ there are limitations compared to chunk consolidation that mostly
+ reflect the fact that segment processing is relatively infrequent
+ (occurring only when getting memory from system) and that we
+ don't expect to have huge numbers of segments:
+
+ * Segments are not indexed, so traversal requires linear scans. (It
+ would be possible to index these, but is not worth the extra
+ overhead and complexity for most programs on most platforms.)
+ * New segments are only appended to old ones when holding top-most
+ memory; if they cannot be prepended to others, they are held in
+ different segments.
+
+ Except for the top-most segment of an mstate, each segment record
+ is kept at the tail of its segment. Segments are added by pushing
+ segment records onto the list headed by &mstate.seg for the
+ containing mstate.
+
+ Segment flags control allocation/merge/deallocation policies:
+ * If EXTERN_BIT set, then we did not allocate this segment,
+ and so should not try to deallocate or merge with others.
+ (This currently holds only for the initial segment passed
+ into create_mspace_with_base.)
+ * If USE_MMAP_BIT set, the segment may be merged with
+ other surrounding mmapped segments and trimmed/de-allocated
+ using munmap.
+ * If neither bit is set, then the segment was obtained using
+ MORECORE so can be merged with surrounding MORECORE'd segments
+ and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+struct malloc_segment {
+ char* base; /* base address */
+ size_t size; /* allocated size */
+ struct malloc_segment* next; /* ptr to next segment */
+ flag_t sflags; /* mmap and extern flag */
+};
+
+#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT)
+#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT)
+
+typedef struct malloc_segment msegment;
+typedef struct malloc_segment* msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+ A malloc_state holds all of the bookkeeping for a space.
+ The main fields are:
+
+ Top
+ The topmost chunk of the currently active segment. Its size is
+ cached in topsize. The actual size of topmost space is
+ topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+ fenceposts and segment records if necessary when getting more
+ space from the system. The size at which to autotrim top is
+ cached from mparams in trim_check, except that it is disabled if
+ an autotrim fails.
+
+ Designated victim (dv)
+ This is the preferred chunk for servicing small requests that
+ don't have exact fits. It is normally the chunk split off most
+ recently to service another small request. Its size is cached in
+ dvsize. The link fields of this chunk are not maintained since it
+ is not kept in a bin.
+
+ SmallBins
+ An array of bin headers for free chunks. These bins hold chunks
+ with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+ chunks of all the same size, spaced 8 bytes apart. To simplify
+ use in double-linked lists, each bin header acts as a malloc_chunk
+ pointing to the real first node, if it exists (else pointing to
+ itself). This avoids special-casing for headers. But to avoid
+ waste, we allocate only the fd/bk pointers of bins, and then use
+ repositioning tricks to treat these as the fields of a chunk.
+
+ TreeBins
+ Treebins are pointers to the roots of trees holding a range of
+ sizes. There are 2 equally spaced treebins for each power of two
+ from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+ larger.
+
+ Bin maps
+ There is one bit map for small bins ("smallmap") and one for
+ treebins ("treemap). Each bin sets its bit when non-empty, and
+ clears the bit when empty. Bit operations are then used to avoid
+ bin-by-bin searching -- nearly all "search" is done without ever
+ looking at bins that won't be selected. The bit maps
+ conservatively use 32 bits per map word, even if on 64bit system.
+ For a good description of some of the bit-based techniques used
+ here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+ supplement at http://hackersdelight.org/). Many of these are
+ intended to reduce the branchiness of paths through malloc etc, as
+ well as to reduce the number of memory locations read or written.
+
+ Segments
+ A list of segments headed by an embedded malloc_segment record
+ representing the initial space.
+
+ Address check support
+ The least_addr field is the least address ever obtained from
+ MORECORE or MMAP. Attempted frees and reallocs of any address less
+ than this are trapped (unless INSECURE is defined).
+
+ Magic tag
+ A cross-check field that should always hold same value as mparams.magic.
+
+ Flags
+ Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+ Statistics
+ Each space keeps track of current and maximum system memory
+ obtained via MORECORE or MMAP.
+
+ Trim support
+ Fields holding the amount of unused topmost memory that should trigger
+ timming, and a counter to force periodic scanning to release unused
+ non-topmost segments.
+
+ Locking
+ If USE_LOCKS is defined, the "mutex" lock is acquired and released
+ around every public call using this mspace.
+
+ Extension support
+ A void* pointer and a size_t field that can be used to help implement
+ extensions to this malloc.
+*/
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS (32U)
+#define NTREEBINS (32U)
+#define SMALLBIN_SHIFT (3U)
+#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT (8U)
+#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+ binmap_t smallmap;
+ binmap_t treemap;
+ size_t dvsize;
+ size_t topsize;
+ char* least_addr;
+ mchunkptr dv;
+ mchunkptr top;
+ size_t trim_check;
+ size_t release_checks;
+ size_t magic;
+ mchunkptr smallbins[(NSMALLBINS+1)*2];
+ tbinptr treebins[NTREEBINS];
+ size_t footprint;
+ size_t max_footprint;
+ flag_t mflags;
+ msegment seg;
+#if USE_LOCKS
+ MLOCK_T mutex; /* locate lock among fields that rarely change */
+#endif /* USE_LOCKS */
+ void* extp; /* Unused but available for extensions */
+ size_t exts;
+};
+
+typedef struct malloc_state* mstate;
+
+/* ------------- Global malloc_state and malloc_params ------------------- */
+
+#if !ONLY_MSPACES
+
+/* The global malloc_state used for all non-"mspace" calls */
+static struct malloc_state _gm_;
+#define gm (&_gm_)
+#define is_global(M) ((M) == &_gm_)
+
+#endif /* !ONLY_MSPACES */
+
+#define is_initialized(M) ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* Operations on mflags */
+
+#define use_lock(M) ((M)->mflags & USE_LOCK_BIT)
+#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT)
+#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT)
+
+#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT)
+#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT)
+#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT)
+
+#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT)
+#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT)
+
+#define set_lock(M,L)\
+ ((M)->mflags = (L)?\
+ ((M)->mflags | USE_LOCK_BIT) :\
+ ((M)->mflags & ~USE_LOCK_BIT))
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+ (((S) + (mparams.granularity - SIZE_T_ONE))\
+ & ~(mparams.granularity - SIZE_T_ONE))
+
+
+/* For mmap, use granularity alignment on windows, else page-align */
+#ifdef WIN32
+#define mmap_align(S) granularity_align(S)
+#else
+#define mmap_align(S) page_align(S)
+#endif
+
+/* For sys_alloc, enough padding to ensure can malloc request on success */
+#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+#define is_page_aligned(S)\
+ (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+#define is_granularity_aligned(S)\
+ (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+/* True if segment S holds address A */
+#define segment_holds(S, A)\
+ ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char* addr) {
+ msegmentptr sp = &m->seg;
+ for (;;) {
+ if (addr >= sp->base && addr < sp->base + sp->size)
+ return sp;
+ if ((sp = sp->next) == 0)
+ return 0;
+ }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss) {
+ msegmentptr sp = &m->seg;
+ for (;;) {
+ if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
+ return 1;
+ if ((sp = sp->next) == 0)
+ return 0;
+ }
+}
+
+#ifndef MORECORE_CANNOT_TRIM
+#define should_trim(M,s) ((s) > (M)->trim_check)
+#else /* MORECORE_CANNOT_TRIM */
+#define should_trim(M,s) (0)
+#endif /* MORECORE_CANNOT_TRIM */
+
+/*
+ TOP_FOOT_SIZE is padding at the end of a segment, including space
+ that may be needed to place segment records and fenceposts when new
+ noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+ (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+
+/* ------------------------------- Hooks -------------------------------- */
+
+/*
+ PREACTION should be defined to return 0 on success, and nonzero on
+ failure. If you are not using locking, you can redefine these to do
+ anything you like.
+*/
+
+#if USE_LOCKS
+
+#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
+#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
+#else /* USE_LOCKS */
+
+#ifndef PREACTION
+#define PREACTION(M) (0)
+#endif /* PREACTION */
+
+#ifndef POSTACTION
+#define POSTACTION(M)
+#endif /* POSTACTION */
+
+#endif /* USE_LOCKS */
+
+/*
+ CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+ USAGE_ERROR_ACTION is triggered on detected bad frees and
+ reallocs. The argument p is an address that might have triggered the
+ fault. It is ignored by the two predefined actions, but might be
+ useful in custom actions that try to help diagnose errors.
+*/
+
+#if PROCEED_ON_ERROR
+
+/* A count of the number of corruption errors causing resets */
+int malloc_corruption_error_count;
+
+/* default corruption action */
+static void reset_on_error(mstate m);
+
+#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m)
+#define USAGE_ERROR_ACTION(m, p)
+
+#else /* PROCEED_ON_ERROR */
+
+#ifndef CORRUPTION_ERROR_ACTION
+#define CORRUPTION_ERROR_ACTION(m) ABORT
+#endif /* CORRUPTION_ERROR_ACTION */
+
+#ifndef USAGE_ERROR_ACTION
+#define USAGE_ERROR_ACTION(m,p) ABORT
+#endif /* USAGE_ERROR_ACTION */
+
+#endif /* PROCEED_ON_ERROR */
+
+/* -------------------------- Debugging setup ---------------------------- */
+
+#if ! DEBUG
+
+#define check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)
+#define check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)
+#define check_malloc_state(M)
+#define check_top_chunk(M,P)
+
+#else /* DEBUG */
+#define check_free_chunk(M,P) do_check_free_chunk(M,P)
+#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P)
+#define check_top_chunk(M,P) do_check_top_chunk(M,P)
+#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P)
+#define check_malloc_state(M) do_check_malloc_state(M)
+
+static void do_check_any_chunk(mstate m, mchunkptr p);
+static void do_check_top_chunk(mstate m, mchunkptr p);
+static void do_check_mmapped_chunk(mstate m, mchunkptr p);
+static void do_check_inuse_chunk(mstate m, mchunkptr p);
+static void do_check_free_chunk(mstate m, mchunkptr p);
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s);
+static void do_check_tree(mstate m, tchunkptr t);
+static void do_check_treebin(mstate m, bindex_t i);
+static void do_check_smallbin(mstate m, bindex_t i);
+static void do_check_malloc_state(mstate m);
+static int bin_find(mstate m, mchunkptr x);
+static size_t traverse_and_check(mstate m);
+#endif /* DEBUG */
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT)
+#define small_index2size(i) ((i) << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i) (&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I. Use x86 asm if possible */
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_tree_index(S, I)\
+{\
+ unsigned int X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int K;\
+ __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\
+ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+ }\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_tree_index(S, I)\
+{\
+ size_t X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int K = _bit_scan_reverse (X); \
+ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+ }\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_tree_index(S, I)\
+{\
+ size_t X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int K;\
+ _BitScanReverse((DWORD *) &K, (DWORD) X);\
+ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+ }\
+}
+
+#else /* GNUC */
+#define compute_tree_index(S, I)\
+{\
+ size_t X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int Y = (unsigned int)X;\
+ unsigned int N = ((Y - 0x100) >> 16) & 8;\
+ unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
+ N += K;\
+ N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
+ K = 14 - N + ((Y <<= K) >> 15);\
+ I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
+ }\
+}
+#endif /* GNUC */
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+ (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+ ((i == NTREEBINS-1)? 0 : \
+ ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+ ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
+ (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i) ((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i))
+#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i))
+
+#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i))
+#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i))
+#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i))
+
+/* isolate the least set bit of a bitmap */
+#define least_bit(x) ((x) & -(x))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x) ((x<<1) | -(x<<1))
+
+/* mask with all bits to left of or equal to least bit of x on */
+#define same_or_left_bits(x) ((x) | -(x))
+
+/* index corresponding to given bit. Use x86 asm if possible */
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int J;\
+ __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\
+ I = (bindex_t)J;\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int J;\
+ J = _bit_scan_forward (X); \
+ I = (bindex_t)J;\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int J;\
+ _BitScanForward((DWORD *) &J, X);\
+ I = (bindex_t)J;\
+}
+
+#elif USE_BUILTIN_FFS
+#define compute_bit2idx(X, I) I = ffs(X)-1
+
+#else
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int Y = X - 1;\
+ unsigned int K = Y >> (16-4) & 16;\
+ unsigned int N = K; Y >>= K;\
+ N += K = Y >> (8-3) & 8; Y >>= K;\
+ N += K = Y >> (4-2) & 4; Y >>= K;\
+ N += K = Y >> (2-1) & 2; Y >>= K;\
+ N += K = Y >> (1-0) & 1; Y >>= K;\
+ I = (bindex_t)(N + Y);\
+}
+#endif /* GNUC */
+
+
+/* ----------------------- Runtime Check Support ------------------------- */
+
+/*
+ For security, the main invariant is that malloc/free/etc never
+ writes to a static address other than malloc_state, unless static
+ malloc_state itself has been corrupted, which cannot occur via
+ malloc (because of these checks). In essence this means that we
+ believe all pointers, sizes, maps etc held in malloc_state, but
+ check all of those linked or offsetted from other embedded data
+ structures. These checks are interspersed with main code in a way
+ that tends to minimize their run-time cost.
+
+ When FOOTERS is defined, in addition to range checking, we also
+ verify footer fields of inuse chunks, which can be used guarantee
+ that the mstate controlling malloc/free is intact. This is a
+ streamlined version of the approach described by William Robertson
+ et al in "Run-time Detection of Heap-based Overflows" LISA'03
+ http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+ of an inuse chunk holds the xor of its mstate and a random seed,
+ that is checked upon calls to free() and realloc(). This is
+ (probablistically) unguessable from outside the program, but can be
+ computed by any code successfully malloc'ing any chunk, so does not
+ itself provide protection against code that has already broken
+ security through some other means. Unlike Robertson et al, we
+ always dynamically check addresses of all offset chunks (previous,
+ next, etc). This turns out to be cheaper than relying on hashes.
+*/
+
+#if !INSECURE
+/* Check if address a is at least as high as any from MORECORE or MMAP */
+#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
+/* Check if address of next chunk n is higher than base chunk p */
+#define ok_next(p, n) ((char*)(p) < (char*)(n))
+/* Check if p has inuse status */
+#define ok_inuse(p) is_inuse(p)
+/* Check if p has its pinuse bit on */
+#define ok_pinuse(p) pinuse(p)
+
+#else /* !INSECURE */
+#define ok_address(M, a) (1)
+#define ok_next(b, n) (1)
+#define ok_inuse(p) (1)
+#define ok_pinuse(p) (1)
+#endif /* !INSECURE */
+
+#if (FOOTERS && !INSECURE)
+/* Check if (alleged) mstate m has expected magic field */
+#define ok_magic(M) ((M)->magic == mparams.magic)
+#else /* (FOOTERS && !INSECURE) */
+#define ok_magic(M) (1)
+#endif /* (FOOTERS && !INSECURE) */
+
+
+/* In gcc, use __builtin_expect to minimize impact of checks */
+#if !INSECURE
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define RTCHECK(e) __builtin_expect(e, 1)
+#else /* GNUC */
+#define RTCHECK(e) (e)
+#endif /* GNUC */
+#else /* !INSECURE */
+#define RTCHECK(e) (1)
+#endif /* !INSECURE */
+
+/* macros to set up inuse chunks with or without footers */
+
+#if !FOOTERS
+
+#define mark_inuse_foot(M,p,s)
+
+/* Macros for setting head/foot of non-mmapped chunks */
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+#else /* FOOTERS */
+
+/* Set foot of inuse chunk to be xor of mstate and seed */
+#define mark_inuse_foot(M,p,s)\
+ (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
+
+#define get_mstate_for(p)\
+ ((mstate)(((mchunkptr)((char*)(p) +\
+ (chunksize(p))))->prev_foot ^ mparams.magic))
+
+#define set_inuse(M,p,s)\
+ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
+ mark_inuse_foot(M,p,s))
+
+#define set_inuse_and_pinuse(M,p,s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
+ mark_inuse_foot(M,p,s))
+
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+ mark_inuse_foot(M, p, s))
+
+#endif /* !FOOTERS */
+
+/* ---------------------------- setting mparams -------------------------- */
+
+#ifdef ENABLE_LARGE_PAGES
+typedef size_t (WINAPI *GetLargePageMinimum_t)(void);
+#endif
+
+/* Initialize mparams */
+static int init_mparams(void) {
+#ifdef NEED_GLOBAL_LOCK_INIT
+ if (malloc_global_mutex_status <= 0)
+ init_malloc_global_mutex();
+#endif
+
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+ if (mparams.magic == 0) {
+ size_t magic;
+ size_t psize;
+ size_t gsize;
+
+#ifndef WIN32
+ psize = malloc_getpagesize;
+ gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
+#else /* WIN32 */
+ {
+ SYSTEM_INFO system_info;
+ GetSystemInfo(&system_info);
+ psize = system_info.dwPageSize;
+ gsize = ((DEFAULT_GRANULARITY != 0)?
+ DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
+#ifdef ENABLE_LARGE_PAGES
+ {
+ GetLargePageMinimum_t GetLargePageMinimum_ = (GetLargePageMinimum_t) GetProcAddress(GetModuleHandle(__T("kernel32.dll")), "GetLargePageMinimum");
+ if(GetLargePageMinimum_) {
+ size_t largepagesize = GetLargePageMinimum_();
+ if(largepagesize) {
+ psize = largepagesize;
+ gsize = ((DEFAULT_GRANULARITY != 0)?
+ DEFAULT_GRANULARITY : largepagesize);
+ if(gsize < largepagesize) gsize = largepagesize;
+ }
+ }
+ }
+#endif
+ }
+#endif /* WIN32 */
+
+ /* Sanity-check configuration:
+ size_t must be unsigned and as wide as pointer type.
+ ints must be at least 4 bytes.
+ alignment must be at least 8.
+ Alignment, min chunk size, and page size must all be powers of 2.
+ */
+ if ((sizeof(size_t) != sizeof(char*)) ||
+ (MAX_SIZE_T < MIN_CHUNK_SIZE) ||
+ (sizeof(int) < 4) ||
+ (MALLOC_ALIGNMENT < (size_t)8U) ||
+ ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
+ ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) ||
+ ((gsize & (gsize-SIZE_T_ONE)) != 0) ||
+ ((psize & (psize-SIZE_T_ONE)) != 0))
+ ABORT;
+
+ mparams.granularity = gsize;
+ mparams.page_size = psize;
+ mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+ mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+#if MORECORE_CONTIGUOUS
+ mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
+#else /* MORECORE_CONTIGUOUS */
+ mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
+#endif /* MORECORE_CONTIGUOUS */
+
+#if !ONLY_MSPACES
+ /* Set up lock for main malloc area */
+ gm->mflags = mparams.default_mflags;
+ INITIAL_LOCK(&gm->mutex);
+#endif
+
+ {
+#if USE_DEV_RANDOM
+ int fd;
+ unsigned char buf[sizeof(size_t)];
+ /* Try to use /dev/urandom, else fall back on using time */
+ if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+ read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+ magic = *((size_t *) buf);
+ close(fd);
+ }
+ else
+#endif /* USE_DEV_RANDOM */
+#ifdef WIN32
+ magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+#else
+ magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+#endif
+ magic |= (size_t)8U; /* ensure nonzero */
+ magic &= ~(size_t)7U; /* improve chances of fault for bad values */
+ mparams.magic = magic;
+ }
+ }
+
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ return 1;
+}
+
+/* support for mallopt */
+static int change_mparam(int param_number, int value) {
+ size_t val;
+ ensure_initialization();
+ val = (value == -1)? MAX_SIZE_T : (size_t)value;
+ switch(param_number) {
+ case M_TRIM_THRESHOLD:
+ mparams.trim_threshold = val;
+ return 1;
+ case M_GRANULARITY:
+ if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
+ mparams.granularity = val;
+ return 1;
+ }
+ else
+ return 0;
+ case M_MMAP_THRESHOLD:
+ mparams.mmap_threshold = val;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+#if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+/* Check properties of any chunk, whether free, inuse, mmapped etc */
+static void do_check_any_chunk(mstate m, mchunkptr p) {
+ assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+ assert(ok_address(m, p));
+}
+
+/* Check properties of top chunk */
+static void do_check_top_chunk(mstate m, mchunkptr p) {
+ msegmentptr sp = segment_holding(m, (char*)p);
+ size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
+ assert(sp != 0);
+ assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+ assert(ok_address(m, p));
+ assert(sz == m->topsize);
+ assert(sz > 0);
+ assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
+ assert(pinuse(p));
+ assert(!pinuse(chunk_plus_offset(p, sz)));
+}
+
+/* Check properties of (inuse) mmapped chunks */
+static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
+ size_t sz = chunksize(p);
+ size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
+ assert(is_mmapped(p));
+ assert(use_mmap(m));
+ assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+ assert(ok_address(m, p));
+ assert(!is_small(sz));
+ assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
+ assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
+ assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
+}
+
+/* Check properties of inuse chunks */
+static void do_check_inuse_chunk(mstate m, mchunkptr p) {
+ do_check_any_chunk(m, p);
+ assert(is_inuse(p));
+ assert(next_pinuse(p));
+ /* If not pinuse and not mmapped, previous chunk has OK offset */
+ assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
+ if (is_mmapped(p))
+ do_check_mmapped_chunk(m, p);
+}
+
+/* Check properties of free chunks */
+static void do_check_free_chunk(mstate m, mchunkptr p) {
+ size_t sz = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, sz);
+ do_check_any_chunk(m, p);
+ assert(!is_inuse(p));
+ assert(!next_pinuse(p));
+ assert (!is_mmapped(p));
+ if (p != m->dv && p != m->top) {
+ if (sz >= MIN_CHUNK_SIZE) {
+ assert((sz & CHUNK_ALIGN_MASK) == 0);
+ assert(is_aligned(chunk2mem(p)));
+ assert(next->prev_foot == sz);
+ assert(pinuse(p));
+ assert (next == m->top || is_inuse(next));
+ assert(p->fd->bk == p);
+ assert(p->bk->fd == p);
+ }
+ else /* markers are always of size SIZE_T_SIZE */
+ assert(sz == SIZE_T_SIZE);
+ }
+}
+
+/* Check properties of malloced chunks at the point they are malloced */
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ size_t sz = p->head & ~INUSE_BITS;
+ do_check_inuse_chunk(m, p);
+ assert((sz & CHUNK_ALIGN_MASK) == 0);
+ assert(sz >= MIN_CHUNK_SIZE);
+ assert(sz >= s);
+ /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
+ assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
+ }
+}
+
+/* Check a tree and its subtrees. */
+static void do_check_tree(mstate m, tchunkptr t) {
+ tchunkptr head = 0;
+ tchunkptr u = t;
+ bindex_t tindex = t->index;
+ size_t tsize = chunksize(t);
+ bindex_t idx;
+ compute_tree_index(tsize, idx);
+ assert(tindex == idx);
+ assert(tsize >= MIN_LARGE_SIZE);
+ assert(tsize >= minsize_for_tree_index(idx));
+ assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
+
+ do { /* traverse through chain of same-sized nodes */
+ do_check_any_chunk(m, ((mchunkptr)u));
+ assert(u->index == tindex);
+ assert(chunksize(u) == tsize);
+ assert(!is_inuse(u));
+ assert(!next_pinuse(u));
+ assert(u->fd->bk == u);
+ assert(u->bk->fd == u);
+ if (u->parent == 0) {
+ assert(u->child[0] == 0);
+ assert(u->child[1] == 0);
+ }
+ else {
+ assert(head == 0); /* only one node on chain has parent */
+ head = u;
+ assert(u->parent != u);
+ assert (u->parent->child[0] == u ||
+ u->parent->child[1] == u ||
+ *((tbinptr*)(u->parent)) == u);
+ if (u->child[0] != 0) {
+ assert(u->child[0]->parent == u);
+ assert(u->child[0] != u);
+ do_check_tree(m, u->child[0]);
+ }
+ if (u->child[1] != 0) {
+ assert(u->child[1]->parent == u);
+ assert(u->child[1] != u);
+ do_check_tree(m, u->child[1]);
+ }
+ if (u->child[0] != 0 && u->child[1] != 0) {
+ assert(chunksize(u->child[0]) < chunksize(u->child[1]));
+ }
+ }
+ u = u->fd;
+ } while (u != t);
+ assert(head != 0);
+}
+
+/* Check all the chunks in a treebin. */
+static void do_check_treebin(mstate m, bindex_t i) {
+ tbinptr* tb = treebin_at(m, i);
+ tchunkptr t = *tb;
+ int empty = (m->treemap & (1U << i)) == 0;
+ if (t == 0)
+ assert(empty);
+ if (!empty)
+ do_check_tree(m, t);
+}
+
+/* Check all the chunks in a smallbin. */
+static void do_check_smallbin(mstate m, bindex_t i) {
+ sbinptr b = smallbin_at(m, i);
+ mchunkptr p = b->bk;
+ unsigned int empty = (m->smallmap & (1U << i)) == 0;
+ if (p == b)
+ assert(empty);
+ if (!empty) {
+ for (; p != b; p = p->bk) {
+ size_t size = chunksize(p);
+ mchunkptr q;
+ /* each chunk claims to be free */
+ do_check_free_chunk(m, p);
+ /* chunk belongs in bin */
+ assert(small_index(size) == i);
+ assert(p->bk == b || chunksize(p->bk) == chunksize(p));
+ /* chunk is followed by an inuse chunk */
+ q = next_chunk(p);
+ if (q->head != FENCEPOST_HEAD)
+ do_check_inuse_chunk(m, q);
+ }
+ }
+}
+
+/* Find x in a bin. Used in other check functions. */
+static int bin_find(mstate m, mchunkptr x) {
+ size_t size = chunksize(x);
+ if (is_small(size)) {
+ bindex_t sidx = small_index(size);
+ sbinptr b = smallbin_at(m, sidx);
+ if (smallmap_is_marked(m, sidx)) {
+ mchunkptr p = b;
+ do {
+ if (p == x)
+ return 1;
+ } while ((p = p->fd) != b);
+ }
+ }
+ else {
+ bindex_t tidx;
+ compute_tree_index(size, tidx);
+ if (treemap_is_marked(m, tidx)) {
+ tchunkptr t = *treebin_at(m, tidx);
+ size_t sizebits = size << leftshift_for_tree_index(tidx);
+ while (t != 0 && chunksize(t) != size) {
+ t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+ sizebits <<= 1;
+ }
+ if (t != 0) {
+ tchunkptr u = t;
+ do {
+ if (u == (tchunkptr)x)
+ return 1;
+ } while ((u = u->fd) != t);
+ }
+ }
+ }
+ return 0;
+}
+
+/* Traverse each chunk and check it; return total */
+static size_t traverse_and_check(mstate m) {
+ size_t sum = 0;
+ if (is_initialized(m)) {
+ msegmentptr s = &m->seg;
+ sum += m->topsize + TOP_FOOT_SIZE;
+ while (s != 0) {
+ mchunkptr q = align_as_chunk(s->base);
+ mchunkptr lastq = 0;
+ assert(pinuse(q));
+ while (segment_holds(s, q) &&
+ q != m->top && q->head != FENCEPOST_HEAD) {
+ sum += chunksize(q);
+ if (is_inuse(q)) {
+ assert(!bin_find(m, q));
+ do_check_inuse_chunk(m, q);
+ }
+ else {
+ assert(q == m->dv || bin_find(m, q));
+ assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
+ do_check_free_chunk(m, q);
+ }
+ lastq = q;
+ q = next_chunk(q);
+ }
+ s = s->next;
+ }
+ }
+ return sum;
+}
+
+/* Check all properties of malloc_state. */
+static void do_check_malloc_state(mstate m) {
+ bindex_t i;
+ size_t total;
+ /* check bins */
+ for (i = 0; i < NSMALLBINS; ++i)
+ do_check_smallbin(m, i);
+ for (i = 0; i < NTREEBINS; ++i)
+ do_check_treebin(m, i);
+
+ if (m->dvsize != 0) { /* check dv chunk */
+ do_check_any_chunk(m, m->dv);
+ assert(m->dvsize == chunksize(m->dv));
+ assert(m->dvsize >= MIN_CHUNK_SIZE);
+ assert(bin_find(m, m->dv) == 0);
+ }
+
+ if (m->top != 0) { /* check top chunk */
+ do_check_top_chunk(m, m->top);
+ /*assert(m->topsize == chunksize(m->top)); redundant */
+ assert(m->topsize > 0);
+ assert(bin_find(m, m->top) == 0);
+ }
+
+ total = traverse_and_check(m);
+ assert(total <= m->footprint);
+ assert(m->footprint <= m->max_footprint);
+}
+#endif /* DEBUG */
+
+/* ----------------------------- statistics ------------------------------ */
+
+#if !NO_MALLINFO
+static struct mallinfo internal_mallinfo(mstate m) {
+ struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ ensure_initialization();
+ if (!PREACTION(m)) {
+ check_malloc_state(m);
+ if (is_initialized(m)) {
+ size_t nfree = SIZE_T_ONE; /* top always free */
+ size_t mfree = m->topsize + TOP_FOOT_SIZE;
+ size_t sum = mfree;
+ msegmentptr s = &m->seg;
+ while (s != 0) {
+ mchunkptr q = align_as_chunk(s->base);
+ while (segment_holds(s, q) &&
+ q != m->top && q->head != FENCEPOST_HEAD) {
+ size_t sz = chunksize(q);
+ sum += sz;
+ if (!is_inuse(q)) {
+ mfree += sz;
+ ++nfree;
+ }
+ q = next_chunk(q);
+ }
+ s = s->next;
+ }
+
+ nm.arena = sum;
+ nm.ordblks = nfree;
+ nm.hblkhd = m->footprint - sum;
+ nm.usmblks = m->max_footprint;
+ nm.uordblks = m->footprint - mfree;
+ nm.fordblks = mfree;
+ nm.keepcost = m->topsize;
+ }
+
+ POSTACTION(m);
+ }
+ return nm;
+}
+#endif /* !NO_MALLINFO */
+
+static void internal_malloc_stats(mstate m) {
+ ensure_initialization();
+ if (!PREACTION(m)) {
+ size_t maxfp = 0;
+ size_t fp = 0;
+ size_t used = 0;
+ check_malloc_state(m);
+ if (is_initialized(m)) {
+ msegmentptr s = &m->seg;
+ maxfp = m->max_footprint;
+ fp = m->footprint;
+ used = fp - (m->topsize + TOP_FOOT_SIZE);
+
+ while (s != 0) {
+ mchunkptr q = align_as_chunk(s->base);
+ while (segment_holds(s, q) &&
+ q != m->top && q->head != FENCEPOST_HEAD) {
+ if (!is_inuse(q))
+ used -= chunksize(q);
+ q = next_chunk(q);
+ }
+ s = s->next;
+ }
+ }
+
+ fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+ fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp));
+ fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used));
+
+ POSTACTION(m);
+ }
+}
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+ Various forms of linking and unlinking are defined as macros. Even
+ the ones for trees, which are very long but have very short typical
+ paths. This is ugly but reduces reliance on inlining support of
+ compilers.
+*/
+
+/* Link a free chunk into a smallbin */
+#define insert_small_chunk(M, P, S) {\
+ bindex_t I = small_index(S);\
+ mchunkptr B = smallbin_at(M, I);\
+ mchunkptr F = B;\
+ assert(S >= MIN_CHUNK_SIZE);\
+ if (!smallmap_is_marked(M, I))\
+ mark_smallmap(M, I);\
+ else if (RTCHECK(ok_address(M, B->fd)))\
+ F = B->fd;\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ B->fd = P;\
+ F->bk = P;\
+ P->fd = F;\
+ P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin */
+#define unlink_small_chunk(M, P, S) {\
+ mchunkptr F = P->fd;\
+ mchunkptr B = P->bk;\
+ bindex_t I = small_index(S);\
+ assert(P != B);\
+ assert(P != F);\
+ assert(chunksize(P) == small_index2size(I));\
+ if (F == B)\
+ clear_smallmap(M, I);\
+ else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\
+ (B == smallbin_at(M,I) || ok_address(M, B)))) {\
+ F->bk = B;\
+ B->fd = F;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+ mchunkptr F = P->fd;\
+ assert(P != B);\
+ assert(P != F);\
+ assert(chunksize(P) == small_index2size(I));\
+ if (B == F)\
+ clear_smallmap(M, I);\
+ else if (RTCHECK(ok_address(M, F))) {\
+ B->fd = F;\
+ F->bk = B;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+}
+
+
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+ size_t DVS = M->dvsize;\
+ if (DVS != 0) {\
+ mchunkptr DV = M->dv;\
+ assert(is_small(DVS));\
+ insert_small_chunk(M, DV, DVS);\
+ }\
+ M->dvsize = S;\
+ M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+ tbinptr* H;\
+ bindex_t I;\
+ compute_tree_index(S, I);\
+ H = treebin_at(M, I);\
+ X->index = I;\
+ X->child[0] = X->child[1] = 0;\
+ if (!treemap_is_marked(M, I)) {\
+ mark_treemap(M, I);\
+ *H = X;\
+ X->parent = (tchunkptr)H;\
+ X->fd = X->bk = X;\
+ }\
+ else {\
+ tchunkptr T = *H;\
+ size_t K = S << leftshift_for_tree_index(I);\
+ for (;;) {\
+ if (chunksize(T) != S) {\
+ tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+ K <<= 1;\
+ if (*C != 0)\
+ T = *C;\
+ else if (RTCHECK(ok_address(M, C))) {\
+ *C = X;\
+ X->parent = T;\
+ X->fd = X->bk = X;\
+ break;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ break;\
+ }\
+ }\
+ else {\
+ tchunkptr F = T->fd;\
+ if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
+ T->fd = F->bk = X;\
+ X->fd = F;\
+ X->bk = T;\
+ X->parent = 0;\
+ break;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ break;\
+ }\
+ }\
+ }\
+ }\
+}
+
+/*
+ Unlink steps:
+
+ 1. If x is a chained node, unlink it from its same-sized fd/bk links
+ and choose its bk node as its replacement.
+ 2. If x was the last node of its size, but not a leaf node, it must
+ be replaced with a leaf node (not merely one with an open left or
+ right), to make sure that lefts and rights of descendents
+ correspond properly to bit masks. We use the rightmost descendent
+ of x. We could use any other leaf, but this is easy to locate and
+ tends to counteract removal of leftmosts elsewhere, and so keeps
+ paths shorter than minimally guaranteed. This doesn't loop much
+ because on average a node in a tree is near the bottom.
+ 3. If x is the base of a chain (i.e., has parent links) relink
+ x's parent and children to x's replacement (or null if none).
+*/
+
+#define unlink_large_chunk(M, X) {\
+ tchunkptr XP = X->parent;\
+ tchunkptr R;\
+ if (X->bk != X) {\
+ tchunkptr F = X->fd;\
+ R = X->bk;\
+ if (RTCHECK(ok_address(M, F))) {\
+ F->bk = R;\
+ R->fd = F;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+ else {\
+ tchunkptr* RP;\
+ if (((R = *(RP = &(X->child[1]))) != 0) ||\
+ ((R = *(RP = &(X->child[0]))) != 0)) {\
+ tchunkptr* CP;\
+ while ((*(CP = &(R->child[1])) != 0) ||\
+ (*(CP = &(R->child[0])) != 0)) {\
+ R = *(RP = CP);\
+ }\
+ if (RTCHECK(ok_address(M, RP)))\
+ *RP = 0;\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+ }\
+ if (XP != 0) {\
+ tbinptr* H = treebin_at(M, X->index);\
+ if (X == *H) {\
+ if ((*H = R) == 0) \
+ clear_treemap(M, X->index);\
+ }\
+ else if (RTCHECK(ok_address(M, XP))) {\
+ if (XP->child[0] == X) \
+ XP->child[0] = R;\
+ else \
+ XP->child[1] = R;\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ if (R != 0) {\
+ if (RTCHECK(ok_address(M, R))) {\
+ tchunkptr C0, C1;\
+ R->parent = XP;\
+ if ((C0 = X->child[0]) != 0) {\
+ if (RTCHECK(ok_address(M, C0))) {\
+ R->child[0] = C0;\
+ C0->parent = R;\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ if ((C1 = X->child[1]) != 0) {\
+ if (RTCHECK(ok_address(M, C1))) {\
+ R->child[1] = C1;\
+ C1->parent = R;\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+ if (is_small(S)) insert_small_chunk(M, P, S)\
+ else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+ if (is_small(S)) unlink_small_chunk(M, P, S)\
+ else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+
+/* Relays to internal calls to malloc/free from realloc, memalign etc */
+
+#if ONLY_MSPACES
+#define internal_malloc(m, b) mspace_malloc(m, b)
+#define internal_free(m, mem) mspace_free(m,mem);
+#else /* ONLY_MSPACES */
+#if MSPACES
+#define internal_malloc(m, b)\
+ (m == gm)? dlmalloc(b) : mspace_malloc(m, b)
+#define internal_free(m, mem)\
+ if (m == gm) dlfree(mem); else mspace_free(m,mem);
+#else /* MSPACES */
+#define internal_malloc(m, b) dlmalloc(b)
+#define internal_free(m, mem) dlfree(mem)
+#endif /* MSPACES */
+#endif /* ONLY_MSPACES */
+
+/* ----------------------- Direct-mmapping chunks ----------------------- */
+
+/*
+ Directly mmapped chunks are set up with an offset to the start of
+ the mmapped region stored in the prev_foot field of the chunk. This
+ allows reconstruction of the required argument to MUNMAP when freed,
+ and also allows adjustment of the returned chunk to meet alignment
+ requirements (especially in memalign).
+*/
+
+/* Malloc using mmap */
+static void* mmap_alloc(mstate m, size_t nb) {
+ size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ if (mmsize > nb) { /* Check for wrap around 0 */
+ char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
+ if (mm != CMFAIL) {
+ size_t offset = align_offset(chunk2mem(mm));
+ size_t psize = mmsize - offset - MMAP_FOOT_PAD;
+ mchunkptr p = (mchunkptr)(mm + offset);
+ p->prev_foot = offset;
+ p->head = psize;
+ mark_inuse_foot(m, p, psize);
+ chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+ chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+
+ if (m->least_addr == 0 || mm < m->least_addr)
+ m->least_addr = mm;
+ if ((m->footprint += mmsize) > m->max_footprint)
+ m->max_footprint = m->footprint;
+ assert(is_aligned(chunk2mem(p)));
+ check_mmapped_chunk(m, p);
+ return chunk2mem(p);
+ }
+ }
+ return 0;
+}
+
+/* Realloc using mmap */
+static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) {
+ size_t oldsize = chunksize(oldp);
+ if (is_small(nb)) /* Can't shrink mmap regions below small size */
+ return 0;
+ /* Keep old chunk if big enough but not too big */
+ if (oldsize >= nb + SIZE_T_SIZE &&
+ (oldsize - nb) <= (mparams.granularity << 1))
+ return oldp;
+ else {
+ size_t offset = oldp->prev_foot;
+ size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+ size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+ oldmmsize, newmmsize, 1);
+ if (cp != CMFAIL) {
+ mchunkptr newp = (mchunkptr)(cp + offset);
+ size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+ newp->head = psize;
+ mark_inuse_foot(m, newp, psize);
+ chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+ chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+
+ if (cp < m->least_addr)
+ m->least_addr = cp;
+ if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
+ m->max_footprint = m->footprint;
+ check_mmapped_chunk(m, newp);
+ return newp;
+ }
+ }
+ return 0;
+}
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize) {
+ /* Ensure alignment */
+ size_t offset = align_offset(chunk2mem(p));
+ p = (mchunkptr)((char*)p + offset);
+ psize -= offset;
+
+ m->top = p;
+ m->topsize = psize;
+ p->head = psize | PINUSE_BIT;
+ /* set size of fake trailing chunk holding overhead space only once */
+ chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+ m->trim_check = mparams.trim_threshold; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m) {
+ /* Establish circular links for smallbins */
+ bindex_t i;
+ for (i = 0; i < NSMALLBINS; ++i) {
+ sbinptr bin = smallbin_at(m,i);
+ bin->fd = bin->bk = bin;
+ }
+}
+
+#if PROCEED_ON_ERROR
+
+/* default corruption action */
+static void reset_on_error(mstate m) {
+ int i;
+ ++malloc_corruption_error_count;
+ /* Reinitialize fields to forget about all memory */
+ m->smallbins = m->treebins = 0;
+ m->dvsize = m->topsize = 0;
+ m->seg.base = 0;
+ m->seg.size = 0;
+ m->seg.next = 0;
+ m->top = m->dv = 0;
+ for (i = 0; i < NTREEBINS; ++i)
+ *treebin_at(m, i) = 0;
+ init_bins(m);
+}
+#endif /* PROCEED_ON_ERROR */
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
+ size_t nb) {
+ mchunkptr p = align_as_chunk(newbase);
+ mchunkptr oldfirst = align_as_chunk(oldbase);
+ size_t psize = (char*)oldfirst - (char*)p;
+ mchunkptr q = chunk_plus_offset(p, nb);
+ size_t qsize = psize - nb;
+ set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+ assert((char*)oldfirst > (char*)q);
+ assert(pinuse(oldfirst));
+ assert(qsize >= MIN_CHUNK_SIZE);
+
+ /* consolidate remainder with first chunk of old base */
+ if (oldfirst == m->top) {
+ size_t tsize = m->topsize += qsize;
+ m->top = q;
+ q->head = tsize | PINUSE_BIT;
+ check_top_chunk(m, q);
+ }
+ else if (oldfirst == m->dv) {
+ size_t dsize = m->dvsize += qsize;
+ m->dv = q;
+ set_size_and_pinuse_of_free_chunk(q, dsize);
+ }
+ else {
+ if (!is_inuse(oldfirst)) {
+ size_t nsize = chunksize(oldfirst);
+ unlink_chunk(m, oldfirst, nsize);
+ oldfirst = chunk_plus_offset(oldfirst, nsize);
+ qsize += nsize;
+ }
+ set_free_with_pinuse(q, qsize, oldfirst);
+ insert_chunk(m, q, qsize);
+ check_free_chunk(m, q);
+ }
+
+ check_malloced_chunk(m, chunk2mem(p), nb);
+ return chunk2mem(p);
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
+ /* Determine locations and sizes of segment, fenceposts, old top */
+ char* old_top = (char*)m->top;
+ msegmentptr oldsp = segment_holding(m, old_top);
+ char* old_end = oldsp->base + oldsp->size;
+ size_t ssize = pad_request(sizeof(struct malloc_segment));
+ char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ size_t offset = align_offset(chunk2mem(rawsp));
+ char* asp = rawsp + offset;
+ char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+ mchunkptr sp = (mchunkptr)csp;
+ msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+ mchunkptr tnext = chunk_plus_offset(sp, ssize);
+ mchunkptr p = tnext;
+ int nfences = 0;
+
+ /* reset top to new space */
+ init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+ /* Set up segment record */
+ assert(is_aligned(ss));
+ set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+ *ss = m->seg; /* Push current record */
+ m->seg.base = tbase;
+ m->seg.size = tsize;
+ m->seg.sflags = mmapped;
+ m->seg.next = ss;
+
+ /* Insert trailing fenceposts */
+ for (;;) {
+ mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+ p->head = FENCEPOST_HEAD;
+ ++nfences;
+ if ((char*)(&(nextp->head)) < old_end)
+ p = nextp;
+ else
+ break;
+ }
+ assert(nfences >= 2);
+
+ /* Insert the rest of old top into a bin as an ordinary free chunk */
+ if (csp != old_top) {
+ mchunkptr q = (mchunkptr)old_top;
+ size_t psize = csp - old_top;
+ mchunkptr tn = chunk_plus_offset(q, psize);
+ set_free_with_pinuse(q, psize, tn);
+ insert_chunk(m, q, psize);
+ }
+
+ check_top_chunk(m, m->top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+/* Get memory from system using MORECORE or MMAP */
+static void* sys_alloc(mstate m, size_t nb) {
+ char* tbase = CMFAIL;
+ size_t tsize = 0;
+ flag_t mmap_flag = 0;
+
+ ensure_initialization();
+
+ /* Directly map large chunks, but only if already initialized */
+ if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
+ void* mem = mmap_alloc(m, nb);
+ if (mem != 0)
+ return mem;
+ }
+
+ /*
+ Try getting memory in any of three ways (in most-preferred to
+ least-preferred order):
+ 1. A call to MORECORE that can normally contiguously extend memory.
+ (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+ or main space is mmapped or a previous contiguous call failed)
+ 2. A call to MMAP new space (disabled if not HAVE_MMAP).
+ Note that under the default settings, if MORECORE is unable to
+ fulfill a request, and HAVE_MMAP is true, then mmap is
+ used as a noncontiguous system allocator. This is a useful backup
+ strategy for systems with holes in address spaces -- in this case
+ sbrk cannot contiguously expand the heap, but mmap may be able to
+ find space.
+ 3. A call to MORECORE that cannot usually contiguously extend memory.
+ (disabled if not HAVE_MORECORE)
+
+ In all cases, we need to request enough bytes from system to ensure
+ we can malloc nb bytes upon success, so pad with enough space for
+ top_foot, plus alignment-pad to make sure we don't lose bytes if
+ not on boundary, and round this up to a granularity unit.
+ */
+
+ if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
+ char* br = CMFAIL;
+ msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
+ size_t asize = 0;
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+
+ if (ss == 0) { /* First time through or recovery */
+ char* base = (char*)CALL_MORECORE(0);
+ if (base != CMFAIL) {
+ asize = granularity_align(nb + SYS_ALLOC_PADDING);
+ /* Adjust to end on a page boundary */
+ if (!is_page_aligned(base))
+ asize += (page_align((size_t)base) - (size_t)base);
+ /* Can't call MORECORE if size is negative when treated as signed */
+ if (asize < HALF_MAX_SIZE_T &&
+ (br = (char*)(CALL_MORECORE(asize))) == base) {
+ tbase = base;
+ tsize = asize;
+ }
+ }
+ }
+ else {
+ /* Subtract out existing available top space from MORECORE request. */
+ asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
+ /* Use mem here only if it did continuously extend old space */
+ if (asize < HALF_MAX_SIZE_T &&
+ (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) {
+ tbase = br;
+ tsize = asize;
+ }
+ }
+
+ if (tbase == CMFAIL) { /* Cope with partial failure */
+ if (br != CMFAIL) { /* Try to use/extend the space we did get */
+ if (asize < HALF_MAX_SIZE_T &&
+ asize < nb + SYS_ALLOC_PADDING) {
+ size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize);
+ if (esize < HALF_MAX_SIZE_T) {
+ char* end = (char*)CALL_MORECORE(esize);
+ if (end != CMFAIL)
+ asize += esize;
+ else { /* Can't use; try to release */
+ (void) CALL_MORECORE(-asize);
+ br = CMFAIL;
+ }
+ }
+ }
+ }
+ if (br != CMFAIL) { /* Use the space we did get */
+ tbase = br;
+ tsize = asize;
+ }
+ else
+ disable_contiguous(m); /* Don't try contiguous path in the future */
+ }
+
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ }
+
+ if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */
+ size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING);
+ if (rsize > nb) { /* Fail if wraps around zero */
+ char* mp = (char*)(CALL_MMAP(rsize));
+ if (mp != CMFAIL) {
+ tbase = mp;
+ tsize = rsize;
+ mmap_flag = USE_MMAP_BIT;
+ }
+ }
+ }
+
+ if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
+ size_t asize = granularity_align(nb + SYS_ALLOC_PADDING);
+ if (asize < HALF_MAX_SIZE_T) {
+ char* br = CMFAIL;
+ char* end = CMFAIL;
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+ br = (char*)(CALL_MORECORE(asize));
+ end = (char*)(CALL_MORECORE(0));
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ if (br != CMFAIL && end != CMFAIL && br < end) {
+ size_t ssize = end - br;
+ if (ssize > nb + TOP_FOOT_SIZE) {
+ tbase = br;
+ tsize = ssize;
+ }
+ }
+ }
+ }
+
+ if (tbase != CMFAIL) {
+
+ if ((m->footprint += tsize) > m->max_footprint)
+ m->max_footprint = m->footprint;
+
+ if (!is_initialized(m)) { /* first-time initialization */
+ if (m->least_addr == 0 || tbase < m->least_addr)
+ m->least_addr = tbase;
+ m->seg.base = tbase;
+ m->seg.size = tsize;
+ m->seg.sflags = mmap_flag;
+ m->magic = mparams.magic;
+ m->release_checks = MAX_RELEASE_CHECK_RATE;
+ init_bins(m);
+#if !ONLY_MSPACES
+ if (is_global(m))
+ init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+ else
+#endif
+ {
+ /* Offset top by embedded malloc_state */
+ mchunkptr mn = next_chunk(mem2chunk(m));
+ init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
+ }
+ }
+
+ else {
+ /* Try to merge with an existing segment */
+ msegmentptr sp = &m->seg;
+ /* Only consider most recent segment if traversal suppressed */
+ while (sp != 0 && tbase != sp->base + sp->size)
+ sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+ if (sp != 0 &&
+ !is_extern_segment(sp) &&
+ (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
+ segment_holds(sp, m->top)) { /* append */
+ sp->size += tsize;
+ init_top(m, m->top, m->topsize + tsize);
+ }
+ else {
+ if (tbase < m->least_addr)
+ m->least_addr = tbase;
+ sp = &m->seg;
+ while (sp != 0 && sp->base != tbase + tsize)
+ sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+ if (sp != 0 &&
+ !is_extern_segment(sp) &&
+ (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
+ char* oldbase = sp->base;
+ sp->base = tbase;
+ sp->size += tsize;
+ return prepend_alloc(m, tbase, oldbase, nb);
+ }
+ else
+ add_segment(m, tbase, tsize, mmap_flag);
+ }
+ }
+
+ if (nb < m->topsize) { /* Allocate from new or extended top space */
+ size_t rsize = m->topsize -= nb;
+ mchunkptr p = m->top;
+ mchunkptr r = m->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+ check_top_chunk(m, m->top);
+ check_malloced_chunk(m, chunk2mem(p), nb);
+ return chunk2mem(p);
+ }
+ }
+
+ MALLOC_FAILURE_ACTION;
+ return 0;
+}
+
+/* ----------------------- system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m) {
+ size_t released = 0;
+ int nsegs = 0;
+ msegmentptr pred = &m->seg;
+ msegmentptr sp = pred->next;
+ while (sp != 0) {
+ char* base = sp->base;
+ size_t size = sp->size;
+ msegmentptr next = sp->next;
+ ++nsegs;
+ if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
+ mchunkptr p = align_as_chunk(base);
+ size_t psize = chunksize(p);
+ /* Can unmap if first chunk holds entire segment and not pinned */
+ if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
+ tchunkptr tp = (tchunkptr)p;
+ assert(segment_holds(sp, (char*)sp));
+ if (p == m->dv) {
+ m->dv = 0;
+ m->dvsize = 0;
+ }
+ else {
+ unlink_large_chunk(m, tp);
+ }
+ if (CALL_MUNMAP(base, size) == 0) {
+ released += size;
+ m->footprint -= size;
+ /* unlink obsoleted record */
+ sp = pred;
+ sp->next = next;
+ }
+ else { /* back out if cannot unmap */
+ insert_large_chunk(m, tp, psize);
+ }
+ }
+ }
+ if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
+ break;
+ pred = sp;
+ sp = next;
+ }
+ /* Reset check counter */
+ m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)?
+ nsegs : MAX_RELEASE_CHECK_RATE);
+ return released;
+}
+
+static int sys_trim(mstate m, size_t pad) {
+ size_t released = 0;
+ ensure_initialization();
+ if (pad < MAX_REQUEST && is_initialized(m)) {
+ pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+ if (m->topsize > pad) {
+ /* Shrink top space in granularity-size units, keeping at least one */
+ size_t unit = mparams.granularity;
+ size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+ SIZE_T_ONE) * unit;
+ msegmentptr sp = segment_holding(m, (char*)m->top);
+
+ if (!is_extern_segment(sp)) {
+ if (is_mmapped_segment(sp)) {
+ if (HAVE_MMAP &&
+ sp->size >= extra &&
+ !has_segment_link(m, sp)) { /* can't shrink if pinned */
+ size_t newsize = sp->size - extra;
+ /* Prefer mremap, fall back to munmap */
+ if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+ (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+ released = extra;
+ }
+ }
+ }
+ else if (HAVE_MORECORE) {
+ if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
+ extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+ {
+ /* Make sure end of memory is where we last set it. */
+ char* old_br = (char*)(CALL_MORECORE(0));
+ if (old_br == sp->base + sp->size) {
+ char* rel_br = (char*)(CALL_MORECORE(-extra));
+ char* new_br = (char*)(CALL_MORECORE(0));
+ if (rel_br != CMFAIL && new_br < old_br)
+ released = old_br - new_br;
+ }
+ }
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ }
+ }
+
+ if (released != 0) {
+ sp->size -= released;
+ m->footprint -= released;
+ init_top(m, m->top, m->topsize - released);
+ check_top_chunk(m, m->top);
+ }
+ }
+
+ /* Unmap any unused mmapped segments */
+ if (HAVE_MMAP)
+ released += release_unused_segments(m);
+
+ /* On failure, disable autotrim to avoid repeated failed future calls */
+ if (released == 0 && m->topsize > m->trim_check)
+ m->trim_check = MAX_SIZE_T;
+ }
+
+ return (released != 0)? 1 : 0;
+}
+
+
+/* ---------------------------- malloc support --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void* tmalloc_large(mstate m, size_t nb) {
+ tchunkptr v = 0;
+ size_t rsize = -nb; /* Unsigned negation */
+ tchunkptr t;
+ bindex_t idx;
+ compute_tree_index(nb, idx);
+ if ((t = *treebin_at(m, idx)) != 0) {
+ /* Traverse tree for this bin looking for node with size == nb */
+ size_t sizebits = nb << leftshift_for_tree_index(idx);
+ tchunkptr rst = 0; /* The deepest untaken right subtree */
+ for (;;) {
+ tchunkptr rt;
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ v = t;
+ if ((rsize = trem) == 0)
+ break;
+ }
+ rt = t->child[1];
+ t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+ if (rt != 0 && rt != t)
+ rst = rt;
+ if (t == 0) {
+ t = rst; /* set t to least subtree holding sizes > nb */
+ break;
+ }
+ sizebits <<= 1;
+ }
+ }
+ if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+ binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+ if (leftbits != 0) {
+ bindex_t i;
+ binmap_t leastbit = least_bit(leftbits);
+ compute_bit2idx(leastbit, i);
+ t = *treebin_at(m, i);
+ }
+ }
+
+ while (t != 0) { /* find smallest of tree or subtree */
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ rsize = trem;
+ v = t;
+ }
+ t = leftmost_child(t);
+ }
+
+ /* If dv is a better fit, return 0 so malloc will use it */
+ if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+ if (RTCHECK(ok_address(m, v))) { /* split */
+ mchunkptr r = chunk_plus_offset(v, nb);
+ assert(chunksize(v) == rsize + nb);
+ if (RTCHECK(ok_next(v, r))) {
+ unlink_large_chunk(m, v);
+ if (rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(m, v, (rsize + nb));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ insert_chunk(m, r, rsize);
+ }
+ return chunk2mem(v);
+ }
+ }
+ CORRUPTION_ERROR_ACTION(m);
+ }
+ return 0;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void* tmalloc_small(mstate m, size_t nb) {
+ tchunkptr t, v;
+ size_t rsize;
+ bindex_t i;
+ binmap_t leastbit = least_bit(m->treemap);
+ compute_bit2idx(leastbit, i);
+ v = t = *treebin_at(m, i);
+ rsize = chunksize(t) - nb;
+
+ while ((t = leftmost_child(t)) != 0) {
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ rsize = trem;
+ v = t;
+ }
+ }
+
+ if (RTCHECK(ok_address(m, v))) {
+ mchunkptr r = chunk_plus_offset(v, nb);
+ assert(chunksize(v) == rsize + nb);
+ if (RTCHECK(ok_next(v, r))) {
+ unlink_large_chunk(m, v);
+ if (rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(m, v, (rsize + nb));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(m, r, rsize);
+ }
+ return chunk2mem(v);
+ }
+ }
+
+ CORRUPTION_ERROR_ACTION(m);
+ return 0;
+}
+
+/* --------------------------- realloc support --------------------------- */
+
+static void* internal_realloc(mstate m, void* oldmem, size_t bytes) {
+ if (bytes >= MAX_REQUEST) {
+ MALLOC_FAILURE_ACTION;
+ return 0;
+ }
+ if (!PREACTION(m)) {
+ mchunkptr oldp = mem2chunk(oldmem);
+ size_t oldsize = chunksize(oldp);
+ mchunkptr next = chunk_plus_offset(oldp, oldsize);
+ mchunkptr newp = 0;
+ void* extra = 0;
+
+ /* Try to either shrink or extend into top. Else malloc-copy-free */
+
+ if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) &&
+ ok_next(oldp, next) && ok_pinuse(next))) {
+ size_t nb = request2size(bytes);
+ if (is_mmapped(oldp))
+ newp = mmap_resize(m, oldp, nb);
+ else if (oldsize >= nb) { /* already big enough */
+ size_t rsize = oldsize - nb;
+ newp = oldp;
+ if (rsize >= MIN_CHUNK_SIZE) {
+ mchunkptr remainder = chunk_plus_offset(newp, nb);
+ set_inuse(m, newp, nb);
+ set_inuse_and_pinuse(m, remainder, rsize);
+ extra = chunk2mem(remainder);
+ }
+ }
+ else if (next == m->top && oldsize + m->topsize > nb) {
+ /* Expand into top */
+ size_t newsize = oldsize + m->topsize;
+ size_t newtopsize = newsize - nb;
+ mchunkptr newtop = chunk_plus_offset(oldp, nb);
+ set_inuse(m, oldp, nb);
+ newtop->head = newtopsize |PINUSE_BIT;
+ m->top = newtop;
+ m->topsize = newtopsize;
+ newp = oldp;
+ }
+ }
+ else {
+ USAGE_ERROR_ACTION(m, oldmem);
+ POSTACTION(m);
+ return 0;
+ }
+#if DEBUG
+ if (newp != 0) {
+ check_inuse_chunk(m, newp); /* Check requires lock */
+ }
+#endif
+
+ POSTACTION(m);
+
+ if (newp != 0) {
+ if (extra != 0) {
+ internal_free(m, extra);
+ }
+ return chunk2mem(newp);
+ }
+ else {
+ void* newmem = internal_malloc(m, bytes);
+ if (newmem != 0) {
+ size_t oc = oldsize - overhead_for(oldp);
+ memcpy(newmem, oldmem, (oc < bytes)? oc : bytes);
+ internal_free(m, oldmem);
+ }
+ return newmem;
+ }
+ }
+ return 0;
+}
+
+/* --------------------------- memalign support -------------------------- */
+
+static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
+ if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */
+ return internal_malloc(m, bytes);
+ if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
+ alignment = MIN_CHUNK_SIZE;
+ if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
+ size_t a = MALLOC_ALIGNMENT << 1;
+ while (a < alignment) a <<= 1;
+ alignment = a;
+ }
+
+ if (bytes >= MAX_REQUEST - alignment) {
+ if (m != 0) { /* Test isn't needed but avoids compiler warning */
+ MALLOC_FAILURE_ACTION;
+ }
+ }
+ else {
+ size_t nb = request2size(bytes);
+ size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+ char* mem = (char*)internal_malloc(m, req);
+ if (mem != 0) {
+ void* leader = 0;
+ void* trailer = 0;
+ mchunkptr p = mem2chunk(mem);
+
+ if (PREACTION(m)) return 0;
+ if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */
+ /*
+ Find an aligned spot inside chunk. Since we need to give
+ back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+ the first calculation places us at a spot with less than
+ MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+ We've allocated enough total room so that this is always
+ possible.
+ */
+ char* br = (char*)mem2chunk((size_t)(((size_t)(mem +
+ alignment -
+ SIZE_T_ONE)) &
+ -alignment));
+ char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
+ br : br+alignment;
+ mchunkptr newp = (mchunkptr)pos;
+ size_t leadsize = pos - (char*)(p);
+ size_t newsize = chunksize(p) - leadsize;
+
+ if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
+ newp->prev_foot = p->prev_foot + leadsize;
+ newp->head = newsize;
+ }
+ else { /* Otherwise, give back leader, use the rest */
+ set_inuse(m, newp, newsize);
+ set_inuse(m, p, leadsize);
+ leader = chunk2mem(p);
+ }
+ p = newp;
+ }
+
+ /* Give back spare room at the end */
+ if (!is_mmapped(p)) {
+ size_t size = chunksize(p);
+ if (size > nb + MIN_CHUNK_SIZE) {
+ size_t remainder_size = size - nb;
+ mchunkptr remainder = chunk_plus_offset(p, nb);
+ set_inuse(m, p, nb);
+ set_inuse(m, remainder, remainder_size);
+ trailer = chunk2mem(remainder);
+ }
+ }
+
+ assert (chunksize(p) >= nb);
+ assert((((size_t)(chunk2mem(p))) % alignment) == 0);
+ check_inuse_chunk(m, p);
+ POSTACTION(m);
+ if (leader != 0) {
+ internal_free(m, leader);
+ }
+ if (trailer != 0) {
+ internal_free(m, trailer);
+ }
+ return chunk2mem(p);
+ }
+ }
+ return 0;
+}
+
+/* ------------------------ comalloc/coalloc support --------------------- */
+
+static void** ialloc(mstate m,
+ size_t n_elements,
+ size_t* sizes,
+ int opts,
+ void* chunks[]) {
+ /*
+ This provides common support for independent_X routines, handling
+ all of the combinations that can result.
+
+ The opts arg has:
+ bit 0 set if all elements are same size (using sizes[0])
+ bit 1 set if elements should be zeroed
+ */
+
+ size_t element_size; /* chunksize of each element, if all same */
+ size_t contents_size; /* total size of elements */
+ size_t array_size; /* request size of pointer array */
+ void* mem; /* malloced aggregate space */
+ mchunkptr p; /* corresponding chunk */
+ size_t remainder_size; /* remaining bytes while splitting */
+ void** marray; /* either "chunks" or malloced ptr array */
+ mchunkptr array_chunk; /* chunk for malloced ptr array */
+ flag_t was_enabled; /* to disable mmap */
+ size_t size;
+ size_t i;
+
+ ensure_initialization();
+ /* compute array length, if needed */
+ if (chunks != 0) {
+ if (n_elements == 0)
+ return chunks; /* nothing to do */
+ marray = chunks;
+ array_size = 0;
+ }
+ else {
+ /* if empty req, must still return chunk representing empty array */
+ if (n_elements == 0)
+ return (void**)internal_malloc(m, 0);
+ marray = 0;
+ array_size = request2size(n_elements * (sizeof(void*)));
+ }
+
+ /* compute total element size */
+ if (opts & 0x1) { /* all-same-size */
+ element_size = request2size(*sizes);
+ contents_size = n_elements * element_size;
+ }
+ else { /* add up all the sizes */
+ element_size = 0;
+ contents_size = 0;
+ for (i = 0; i != n_elements; ++i)
+ contents_size += request2size(sizes[i]);
+ }
+
+ size = contents_size + array_size;
+
+ /*
+ Allocate the aggregate chunk. First disable direct-mmapping so
+ malloc won't use it, since we would not be able to later
+ free/realloc space internal to a segregated mmap region.
+ */
+ was_enabled = use_mmap(m);
+ disable_mmap(m);
+ mem = internal_malloc(m, size - CHUNK_OVERHEAD);
+ if (was_enabled)
+ enable_mmap(m);
+ if (mem == 0)
+ return 0;
+
+ if (PREACTION(m)) return 0;
+ p = mem2chunk(mem);
+ remainder_size = chunksize(p);
+
+ assert(!is_mmapped(p));
+
+ if (opts & 0x2) { /* optionally clear the elements */
+ memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
+ }
+
+ /* If not provided, allocate the pointer array as final part of chunk */
+ if (marray == 0) {
+ size_t array_chunk_size;
+ array_chunk = chunk_plus_offset(p, contents_size);
+ array_chunk_size = remainder_size - contents_size;
+ marray = (void**) (chunk2mem(array_chunk));
+ set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
+ remainder_size = contents_size;
+ }
+
+ /* split out elements */
+ for (i = 0; ; ++i) {
+ marray[i] = chunk2mem(p);
+ if (i != n_elements-1) {
+ if (element_size != 0)
+ size = element_size;
+ else
+ size = request2size(sizes[i]);
+ remainder_size -= size;
+ set_size_and_pinuse_of_inuse_chunk(m, p, size);
+ p = chunk_plus_offset(p, size);
+ }
+ else { /* the final element absorbs any overallocation slop */
+ set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
+ break;
+ }
+ }
+
+#if DEBUG
+ if (marray != chunks) {
+ /* final element must have exactly exhausted chunk */
+ if (element_size != 0) {
+ assert(remainder_size == element_size);
+ }
+ else {
+ assert(remainder_size == request2size(sizes[i]));
+ }
+ check_inuse_chunk(m, mem2chunk(marray));
+ }
+ for (i = 0; i != n_elements; ++i)
+ check_inuse_chunk(m, mem2chunk(marray[i]));
+
+#endif /* DEBUG */
+
+ POSTACTION(m);
+ return marray;
+}
+
+
+/* -------------------------- public routines ---------------------------- */
+
+#if !ONLY_MSPACES
+
+void* dlmalloc(size_t bytes) {
+ /*
+ Basic algorithm:
+ If a small request (< 256 bytes minus per-chunk overhead):
+ 1. If one exists, use a remainderless chunk in associated smallbin.
+ (Remainderless means that there are too few excess bytes to
+ represent as a chunk.)
+ 2. If it is big enough, use the dv chunk, which is normally the
+ chunk adjacent to the one used for the most recent small request.
+ 3. If one exists, split the smallest available chunk in a bin,
+ saving remainder in dv.
+ 4. If it is big enough, use the top chunk.
+ 5. If available, get memory from system and use it
+ Otherwise, for a large request:
+ 1. Find the smallest available binned chunk that fits, and use it
+ if it is better fitting than dv chunk, splitting if necessary.
+ 2. If better fitting than any binned chunk, use the dv chunk.
+ 3. If it is big enough, use the top chunk.
+ 4. If request size >= mmap threshold, try to directly mmap this chunk.
+ 5. If available, get memory from system and use it
+
+ The ugly goto's here ensure that postaction occurs along all paths.
+ */
+
+#if USE_LOCKS
+ ensure_initialization(); /* initialize in sys_alloc if not using locks */
+#endif
+
+ if (!PREACTION(gm)) {
+ void* mem;
+ size_t nb;
+ if (bytes <= MAX_SMALL_REQUEST) {
+ bindex_t idx;
+ binmap_t smallbits;
+ nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+ idx = small_index(nb);
+ smallbits = gm->smallmap >> idx;
+
+ if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+ mchunkptr b, p;
+ idx += ~smallbits & 1; /* Uses next bin if idx empty */
+ b = smallbin_at(gm, idx);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(idx));
+ unlink_first_small_chunk(gm, b, p, idx);
+ set_inuse_and_pinuse(gm, p, small_index2size(idx));
+ mem = chunk2mem(p);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb > gm->dvsize) {
+ if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+ mchunkptr b, p, r;
+ size_t rsize;
+ bindex_t i;
+ binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+ binmap_t leastbit = least_bit(leftbits);
+ compute_bit2idx(leastbit, i);
+ b = smallbin_at(gm, i);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(i));
+ unlink_first_small_chunk(gm, b, p, i);
+ rsize = small_index2size(i) - nb;
+ /* Fit here cannot be remainderless if 4byte sizes */
+ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(gm, p, small_index2size(i));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+ r = chunk_plus_offset(p, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(gm, r, rsize);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+ }
+ }
+ else if (bytes >= MAX_REQUEST)
+ nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+ else {
+ nb = pad_request(bytes);
+ if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+ }
+
+ if (nb <= gm->dvsize) {
+ size_t rsize = gm->dvsize - nb;
+ mchunkptr p = gm->dv;
+ if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+ mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
+ gm->dvsize = rsize;
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+ }
+ else { /* exhaust dv */
+ size_t dvs = gm->dvsize;
+ gm->dvsize = 0;
+ gm->dv = 0;
+ set_inuse_and_pinuse(gm, p, dvs);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb < gm->topsize) { /* Split top */
+ size_t rsize = gm->topsize -= nb;
+ mchunkptr p = gm->top;
+ mchunkptr r = gm->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+ mem = chunk2mem(p);
+ check_top_chunk(gm, gm->top);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ mem = sys_alloc(gm, nb);
+
+ postaction:
+ POSTACTION(gm);
+ return mem;
+ }
+
+ return 0;
+}
+
+void dlfree(void* mem) {
+ /*
+ Consolidate freed chunks with preceeding or succeeding bordering
+ free chunks, if they exist, and then place in a bin. Intermixed
+ with special cases for top, dv, mmapped chunks, and usage errors.
+ */
+
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+#if FOOTERS
+ mstate fm = get_mstate_for(p);
+ if (!ok_magic(fm)) {
+ USAGE_ERROR_ACTION(fm, p);
+ return;
+ }
+#else /* FOOTERS */
+#define fm gm
+#endif /* FOOTERS */
+ if (!PREACTION(fm)) {
+ check_inuse_chunk(fm, p);
+ if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+ size_t psize = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, psize);
+ if (!pinuse(p)) {
+ size_t prevsize = p->prev_foot;
+ if (is_mmapped(p)) {
+ psize += prevsize + MMAP_FOOT_PAD;
+ if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+ fm->footprint -= psize;
+ goto postaction;
+ }
+ else {
+ mchunkptr prev = chunk_minus_offset(p, prevsize);
+ psize += prevsize;
+ p = prev;
+ if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+ if (p != fm->dv) {
+ unlink_chunk(fm, p, prevsize);
+ }
+ else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+ fm->dvsize = psize;
+ set_free_with_pinuse(p, psize, next);
+ goto postaction;
+ }
+ }
+ else
+ goto erroraction;
+ }
+ }
+
+ if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+ if (!cinuse(next)) { /* consolidate forward */
+ if (next == fm->top) {
+ size_t tsize = fm->topsize += psize;
+ fm->top = p;
+ p->head = tsize | PINUSE_BIT;
+ if (p == fm->dv) {
+ fm->dv = 0;
+ fm->dvsize = 0;
+ }
+ if (should_trim(fm, tsize))
+ sys_trim(fm, 0);
+ goto postaction;
+ }
+ else if (next == fm->dv) {
+ size_t dsize = fm->dvsize += psize;
+ fm->dv = p;
+ set_size_and_pinuse_of_free_chunk(p, dsize);
+ goto postaction;
+ }
+ else {
+ size_t nsize = chunksize(next);
+ psize += nsize;
+ unlink_chunk(fm, next, nsize);
+ set_size_and_pinuse_of_free_chunk(p, psize);
+ if (p == fm->dv) {
+ fm->dvsize = psize;
+ goto postaction;
+ }
+ }
+ }
+ else
+ set_free_with_pinuse(p, psize, next);
+
+ if (is_small(psize)) {
+ insert_small_chunk(fm, p, psize);
+ check_free_chunk(fm, p);
+ }
+ else {
+ tchunkptr tp = (tchunkptr)p;
+ insert_large_chunk(fm, tp, psize);
+ check_free_chunk(fm, p);
+ if (--fm->release_checks == 0)
+ release_unused_segments(fm);
+ }
+ goto postaction;
+ }
+ }
+ erroraction:
+ USAGE_ERROR_ACTION(fm, p);
+ postaction:
+ POSTACTION(fm);
+ }
+ }
+#if !FOOTERS
+#undef fm
+#endif /* FOOTERS */
+}
+
+void* dlcalloc(size_t n_elements, size_t elem_size) {
+ void* mem;
+ size_t req = 0;
+ if (n_elements != 0) {
+ req = n_elements * elem_size;
+ if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+ (req / n_elements != elem_size))
+ req = MAX_SIZE_T; /* force downstream failure on overflow */
+ }
+ mem = dlmalloc(req);
+ if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+ memset(mem, 0, req);
+ return mem;
+}
+
+void* dlrealloc(void* oldmem, size_t bytes) {
+ if (oldmem == 0)
+ return dlmalloc(bytes);
+#ifdef REALLOC_ZERO_BYTES_FREES
+ if (bytes == 0) {
+ dlfree(oldmem);
+ return 0;
+ }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+ else {
+#if ! FOOTERS
+ mstate m = gm;
+#else /* FOOTERS */
+ mstate m = get_mstate_for(mem2chunk(oldmem));
+ if (!ok_magic(m)) {
+ USAGE_ERROR_ACTION(m, oldmem);
+ return 0;
+ }
+#endif /* FOOTERS */
+ return internal_realloc(m, oldmem, bytes);
+ }
+}
+
+void* dlmemalign(size_t alignment, size_t bytes) {
+ return internal_memalign(gm, alignment, bytes);
+}
+
+void** dlindependent_calloc(size_t n_elements, size_t elem_size,
+ void* chunks[]) {
+ size_t sz = elem_size; /* serves as 1-element array */
+ return ialloc(gm, n_elements, &sz, 3, chunks);
+}
+
+void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
+ void* chunks[]) {
+ return ialloc(gm, n_elements, sizes, 0, chunks);
+}
+
+void* dlvalloc(size_t bytes) {
+ size_t pagesz;
+ ensure_initialization();
+ pagesz = mparams.page_size;
+ return dlmemalign(pagesz, bytes);
+}
+
+void* dlpvalloc(size_t bytes) {
+ size_t pagesz;
+ ensure_initialization();
+ pagesz = mparams.page_size;
+ return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
+}
+
+int dlmalloc_trim(size_t pad) {
+ int result = 0;
+ ensure_initialization();
+ if (!PREACTION(gm)) {
+ result = sys_trim(gm, pad);
+ POSTACTION(gm);
+ }
+ return result;
+}
+
+size_t dlmalloc_footprint(void) {
+ return gm->footprint;
+}
+
+size_t dlmalloc_max_footprint(void) {
+ return gm->max_footprint;
+}
+
+#if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+ return internal_mallinfo(gm);
+}
+#endif /* NO_MALLINFO */
+
+void dlmalloc_stats() {
+ internal_malloc_stats(gm);
+}
+
+int dlmallopt(int param_number, int value) {
+ return change_mparam(param_number, value);
+}
+
+#endif /* !ONLY_MSPACES */
+
+size_t dlmalloc_usable_size(void* mem) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ if (is_inuse(p))
+ return chunksize(p) - overhead_for(p);
+ }
+ return 0;
+}
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+#if MSPACES
+
+static mstate init_user_mstate(char* tbase, size_t tsize) {
+ size_t msize = pad_request(sizeof(struct malloc_state));
+ mchunkptr mn;
+ mchunkptr msp = align_as_chunk(tbase);
+ mstate m = (mstate)(chunk2mem(msp));
+ memset(m, 0, msize);
+ INITIAL_LOCK(&m->mutex);
+ msp->head = (msize|INUSE_BITS);
+ m->seg.base = m->least_addr = tbase;
+ m->seg.size = m->footprint = m->max_footprint = tsize;
+ m->magic = mparams.magic;
+ m->release_checks = MAX_RELEASE_CHECK_RATE;
+ m->mflags = mparams.default_mflags;
+ m->extp = 0;
+ m->exts = 0;
+ disable_contiguous(m);
+ init_bins(m);
+ mn = next_chunk(mem2chunk(m));
+ init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
+ check_top_chunk(m, m->top);
+ return m;
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+ mstate m = 0;
+ size_t msize;
+ ensure_initialization();
+ msize = pad_request(sizeof(struct malloc_state));
+ if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+ size_t rs = ((capacity == 0)? mparams.granularity :
+ (capacity + TOP_FOOT_SIZE + msize));
+ size_t tsize = granularity_align(rs);
+ char* tbase = (char*)(CALL_MMAP(tsize));
+ if (tbase != CMFAIL) {
+ m = init_user_mstate(tbase, tsize);
+ m->seg.sflags = USE_MMAP_BIT;
+ set_lock(m, locked);
+ }
+ }
+ return (mspace)m;
+}
+
+mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
+ mstate m = 0;
+ size_t msize;
+ ensure_initialization();
+ msize = pad_request(sizeof(struct malloc_state));
+ if (capacity > msize + TOP_FOOT_SIZE &&
+ capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+ m = init_user_mstate((char*)base, capacity);
+ m->seg.sflags = EXTERN_BIT;
+ set_lock(m, locked);
+ }
+ return (mspace)m;
+}
+
+int mspace_track_large_chunks(mspace msp, int enable) {
+ int ret = 0;
+ mstate ms = (mstate)msp;
+ if (!PREACTION(ms)) {
+ if (!use_mmap(ms))
+ ret = 1;
+ if (!enable)
+ enable_mmap(ms);
+ else
+ disable_mmap(ms);
+ POSTACTION(ms);
+ }
+ return ret;
+}
+
+size_t destroy_mspace(mspace msp) {
+ size_t freed = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ msegmentptr sp = &ms->seg;
+ while (sp != 0) {
+ char* base = sp->base;
+ size_t size = sp->size;
+ flag_t flag = sp->sflags;
+ sp = sp->next;
+ if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+ CALL_MUNMAP(base, size) == 0)
+ freed += size;
+ }
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return freed;
+}
+
+/*
+ mspace versions of routines are near-clones of the global
+ versions. This is not so nice but better than the alternatives.
+*/
+
+
+void* mspace_malloc(mspace msp, size_t bytes) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ if (!PREACTION(ms)) {
+ void* mem;
+ size_t nb;
+ if (bytes <= MAX_SMALL_REQUEST) {
+ bindex_t idx;
+ binmap_t smallbits;
+ nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+ idx = small_index(nb);
+ smallbits = ms->smallmap >> idx;
+
+ if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+ mchunkptr b, p;
+ idx += ~smallbits & 1; /* Uses next bin if idx empty */
+ b = smallbin_at(ms, idx);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(idx));
+ unlink_first_small_chunk(ms, b, p, idx);
+ set_inuse_and_pinuse(ms, p, small_index2size(idx));
+ mem = chunk2mem(p);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb > ms->dvsize) {
+ if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+ mchunkptr b, p, r;
+ size_t rsize;
+ bindex_t i;
+ binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+ binmap_t leastbit = least_bit(leftbits);
+ compute_bit2idx(leastbit, i);
+ b = smallbin_at(ms, i);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(i));
+ unlink_first_small_chunk(ms, b, p, i);
+ rsize = small_index2size(i) - nb;
+ /* Fit here cannot be remainderless if 4byte sizes */
+ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(ms, p, small_index2size(i));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ r = chunk_plus_offset(p, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(ms, r, rsize);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+ }
+ }
+ else if (bytes >= MAX_REQUEST)
+ nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+ else {
+ nb = pad_request(bytes);
+ if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+ }
+
+ if (nb <= ms->dvsize) {
+ size_t rsize = ms->dvsize - nb;
+ mchunkptr p = ms->dv;
+ if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+ mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+ ms->dvsize = rsize;
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ }
+ else { /* exhaust dv */
+ size_t dvs = ms->dvsize;
+ ms->dvsize = 0;
+ ms->dv = 0;
+ set_inuse_and_pinuse(ms, p, dvs);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb < ms->topsize) { /* Split top */
+ size_t rsize = ms->topsize -= nb;
+ mchunkptr p = ms->top;
+ mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ mem = chunk2mem(p);
+ check_top_chunk(ms, ms->top);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ mem = sys_alloc(ms, nb);
+
+ postaction:
+ POSTACTION(ms);
+ return mem;
+ }
+
+ return 0;
+}
+
+void mspace_free(mspace msp, void* mem) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+#if FOOTERS
+ mstate fm = get_mstate_for(p);
+ msp = msp; /* placate people compiling -Wunused */
+#else /* FOOTERS */
+ mstate fm = (mstate)msp;
+#endif /* FOOTERS */
+ if (!ok_magic(fm)) {
+ USAGE_ERROR_ACTION(fm, p);
+ return;
+ }
+ if (!PREACTION(fm)) {
+ check_inuse_chunk(fm, p);
+ if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+ size_t psize = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, psize);
+ if (!pinuse(p)) {
+ size_t prevsize = p->prev_foot;
+ if (is_mmapped(p)) {
+ psize += prevsize + MMAP_FOOT_PAD;
+ if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+ fm->footprint -= psize;
+ goto postaction;
+ }
+ else {
+ mchunkptr prev = chunk_minus_offset(p, prevsize);
+ psize += prevsize;
+ p = prev;
+ if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+ if (p != fm->dv) {
+ unlink_chunk(fm, p, prevsize);
+ }
+ else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+ fm->dvsize = psize;
+ set_free_with_pinuse(p, psize, next);
+ goto postaction;
+ }
+ }
+ else
+ goto erroraction;
+ }
+ }
+
+ if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+ if (!cinuse(next)) { /* consolidate forward */
+ if (next == fm->top) {
+ size_t tsize = fm->topsize += psize;
+ fm->top = p;
+ p->head = tsize | PINUSE_BIT;
+ if (p == fm->dv) {
+ fm->dv = 0;
+ fm->dvsize = 0;
+ }
+ if (should_trim(fm, tsize))
+ sys_trim(fm, 0);
+ goto postaction;
+ }
+ else if (next == fm->dv) {
+ size_t dsize = fm->dvsize += psize;
+ fm->dv = p;
+ set_size_and_pinuse_of_free_chunk(p, dsize);
+ goto postaction;
+ }
+ else {
+ size_t nsize = chunksize(next);
+ psize += nsize;
+ unlink_chunk(fm, next, nsize);
+ set_size_and_pinuse_of_free_chunk(p, psize);
+ if (p == fm->dv) {
+ fm->dvsize = psize;
+ goto postaction;
+ }
+ }
+ }
+ else
+ set_free_with_pinuse(p, psize, next);
+
+ if (is_small(psize)) {
+ insert_small_chunk(fm, p, psize);
+ check_free_chunk(fm, p);
+ }
+ else {
+ tchunkptr tp = (tchunkptr)p;
+ insert_large_chunk(fm, tp, psize);
+ check_free_chunk(fm, p);
+ if (--fm->release_checks == 0)
+ release_unused_segments(fm);
+ }
+ goto postaction;
+ }
+ }
+ erroraction:
+ USAGE_ERROR_ACTION(fm, p);
+ postaction:
+ POSTACTION(fm);
+ }
+ }
+}
+
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+ void* mem;
+ size_t req = 0;
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ if (n_elements != 0) {
+ req = n_elements * elem_size;
+ if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+ (req / n_elements != elem_size))
+ req = MAX_SIZE_T; /* force downstream failure on overflow */
+ }
+ mem = internal_malloc(ms, req);
+ if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+ memset(mem, 0, req);
+ return mem;
+}
+
+void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+ if (oldmem == 0)
+ return mspace_malloc(msp, bytes);
+#ifdef REALLOC_ZERO_BYTES_FREES
+ if (bytes == 0) {
+ mspace_free(msp, oldmem);
+ return 0;
+ }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+ else {
+#if FOOTERS
+ mchunkptr p = mem2chunk(oldmem);
+ mstate ms = get_mstate_for(p);
+#else /* FOOTERS */
+ mstate ms = (mstate)msp;
+#endif /* FOOTERS */
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ return internal_realloc(ms, oldmem, bytes);
+ }
+}
+
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ return internal_memalign(ms, alignment, bytes);
+}
+
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+ size_t elem_size, void* chunks[]) {
+ size_t sz = elem_size; /* serves as 1-element array */
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ return ialloc(ms, n_elements, &sz, 3, chunks);
+}
+
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+ size_t sizes[], void* chunks[]) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ return ialloc(ms, n_elements, sizes, 0, chunks);
+}
+
+int mspace_trim(mspace msp, size_t pad) {
+ int result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ if (!PREACTION(ms)) {
+ result = sys_trim(ms, pad);
+ POSTACTION(ms);
+ }
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+void mspace_malloc_stats(mspace msp) {
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ internal_malloc_stats(ms);
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+}
+
+size_t mspace_footprint(mspace msp) {
+ size_t result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ result = ms->footprint;
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+
+size_t mspace_max_footprint(mspace msp) {
+ size_t result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ result = ms->max_footprint;
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+
+#if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return internal_mallinfo(ms);
+}
+#endif /* NO_MALLINFO */
+
+size_t mspace_usable_size(void* mem) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ if (is_inuse(p))
+ return chunksize(p) - overhead_for(p);
+ }
+ return 0;
+}
+
+int mspace_mallopt(int param_number, int value) {
+ return change_mparam(param_number, value);
+}
+
+#endif /* MSPACES */
+
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+ Guidelines for creating a custom version of MORECORE:
+
+ * For best performance, MORECORE should allocate in multiples of pagesize.
+ * MORECORE may allocate more memory than requested. (Or even less,
+ but this will usually result in a malloc failure.)
+ * MORECORE must not allocate memory when given argument zero, but
+ instead return one past the end address of memory from previous
+ nonzero call.
+ * For best performance, consecutive calls to MORECORE with positive
+ arguments should return increasing addresses, indicating that
+ space has been contiguously extended.
+ * Even though consecutive calls to MORECORE need not return contiguous
+ addresses, it must be OK for malloc'ed chunks to span multiple
+ regions in those cases where they do happen to be contiguous.
+ * MORECORE need not handle negative arguments -- it may instead
+ just return MFAIL when given negative arguments.
+ Negative arguments are always multiples of pagesize. MORECORE
+ must not misinterpret negative args as large positive unsigned
+ args. You can suppress all such calls from even occurring by defining
+ MORECORE_CANNOT_TRIM,
+
+ As an example alternative MORECORE, here is a custom allocator
+ kindly contributed for pre-OSX macOS. It uses virtually but not
+ necessarily physically contiguous non-paged memory (locked in,
+ present and won't get swapped out). You can use it by uncommenting
+ this section, adding some #includes, and setting up the appropriate
+ defines above:
+
+ #define MORECORE osMoreCore
+
+ There is also a shutdown routine that should somehow be called for
+ cleanup upon program exit.
+
+ #define MAX_POOL_ENTRIES 100
+ #define MINIMUM_MORECORE_SIZE (64 * 1024U)
+ static int next_os_pool;
+ void *our_os_pools[MAX_POOL_ENTRIES];
+
+ void *osMoreCore(int size)
+ {
+ void *ptr = 0;
+ static void *sbrk_top = 0;
+
+ if (size > 0)
+ {
+ if (size < MINIMUM_MORECORE_SIZE)
+ size = MINIMUM_MORECORE_SIZE;
+ if (CurrentExecutionLevel() == kTaskLevel)
+ ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+ if (ptr == 0)
+ {
+ return (void *) MFAIL;
+ }
+ // save ptrs so they can be freed during cleanup
+ our_os_pools[next_os_pool] = ptr;
+ next_os_pool++;
+ ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+ sbrk_top = (char *) ptr + size;
+ return ptr;
+ }
+ else if (size < 0)
+ {
+ // we don't currently support shrink behavior
+ return (void *) MFAIL;
+ }
+ else
+ {
+ return sbrk_top;
+ }
+ }
+
+ // cleanup any allocated memory pools
+ // called as last thing before shutting down driver
+
+ void osCleanupMem(void)
+ {
+ void **ptr;
+
+ for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+ if (*ptr)
+ {
+ PoolDeallocate(*ptr);
+ *ptr = 0;
+ }
+ }
+
+*/
+
+
+/* -----------------------------------------------------------------------
+History:
+ V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee)
+ * Use zeros instead of prev foot for is_mmapped
+ * Add mspace_track_large_chunks; thanks to Jean Brouwers
+ * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
+ * Fix insufficient sys_alloc padding when using 16byte alignment
+ * Fix bad error check in mspace_footprint
+ * Adaptations for ptmalloc; thanks to Wolfram Gloger.
+ * Reentrant spin locks; thanks to Earl Chew and others
+ * Win32 improvements; thanks to Niall Douglas and Earl Chew
+ * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
+ * Extension hook in malloc_state
+ * Various small adjustments to reduce warnings on some compilers
+ * Various configuration extensions/changes for more platforms. Thanks
+ to all who contributed these.
+
+ V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee)
+ * Add max_footprint functions
+ * Ensure all appropriate literals are size_t
+ * Fix conditional compilation problem for some #define settings
+ * Avoid concatenating segments with the one provided
+ in create_mspace_with_base
+ * Rename some variables to avoid compiler shadowing warnings
+ * Use explicit lock initialization.
+ * Better handling of sbrk interference.
+ * Simplify and fix segment insertion, trimming and mspace_destroy
+ * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+ * Thanks especially to Dennis Flanagan for help on these.
+
+ V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee)
+ * Fix memalign brace error.
+
+ V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee)
+ * Fix improper #endif nesting in C++
+ * Add explicit casts needed for C++
+
+ V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee)
+ * Use trees for large bins
+ * Support mspaces
+ * Use segments to unify sbrk-based and mmap-based system allocation,
+ removing need for emulation on most platforms without sbrk.
+ * Default safety checks
+ * Optional footer checks. Thanks to William Robertson for the idea.
+ * Internal code refactoring
+ * Incorporate suggestions and platform-specific changes.
+ Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+ Aaron Bachmann, Emery Berger, and others.
+ * Speed up non-fastbin processing enough to remove fastbins.
+ * Remove useless cfree() to avoid conflicts with other apps.
+ * Remove internal memcpy, memset. Compilers handle builtins better.
+ * Remove some options that no one ever used and rename others.
+
+ V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee)
+ * Fix malloc_state bitmap array misdeclaration
+
+ V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee)
+ * Allow tuning of FIRST_SORTED_BIN_SIZE
+ * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+ * Better detection and support for non-contiguousness of MORECORE.
+ Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+ * Bypass most of malloc if no frees. Thanks To Emery Berger.
+ * Fix freeing of old top non-contiguous chunk im sysmalloc.
+ * Raised default trim and map thresholds to 256K.
+ * Fix mmap-related #defines. Thanks to Lubos Lunak.
+ * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+ * Branch-free bin calculation
+ * Default trim and mmap thresholds now 256K.
+
+ V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee)
+ * Introduce independent_comalloc and independent_calloc.
+ Thanks to Michael Pachos for motivation and help.
+ * Make optional .h file available
+ * Allow > 2GB requests on 32bit systems.
+ * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+ Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+ and Anonymous.
+ * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+ helping test this.)
+ * memalign: check alignment arg
+ * realloc: don't try to shift chunks backwards, since this
+ leads to more fragmentation in some programs and doesn't
+ seem to help in any others.
+ * Collect all cases in malloc requiring system memory into sysmalloc
+ * Use mmap as backup to sbrk
+ * Place all internal state in malloc_state
+ * Introduce fastbins (although similar to 2.5.1)
+ * Many minor tunings and cosmetic improvements
+ * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+ * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+ Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+ * Include errno.h to support default failure action.
+
+ V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee)
+ * return null for negative arguments
+ * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+ * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+ (e.g. WIN32 platforms)
+ * Cleanup header file inclusion for WIN32 platforms
+ * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+ * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+ memory allocation routines
+ * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+ * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+ usage of 'assert' in non-WIN32 code
+ * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+ avoid infinite loop
+ * Always call 'fREe()' rather than 'free()'
+
+ V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee)
+ * Fixed ordering problem with boundary-stamping
+
+ V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee)
+ * Added pvalloc, as recommended by H.J. Liu
+ * Added 64bit pointer support mainly from Wolfram Gloger
+ * Added anonymously donated WIN32 sbrk emulation
+ * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+ * malloc_extend_top: fix mask error that caused wastage after
+ foreign sbrks
+ * Add linux mremap support code from HJ Liu
+
+ V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee)
+ * Integrated most documentation with the code.
+ * Add support for mmap, with help from
+ Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+ * Use last_remainder in more cases.
+ * Pack bins using idea from colin@nyx10.cs.du.edu
+ * Use ordered bins instead of best-fit threshhold
+ * Eliminate block-local decls to simplify tracing and debugging.
+ * Support another case of realloc via move into top
+ * Fix error occuring when initial sbrk_base not word-aligned.
+ * Rely on page size for units instead of SBRK_UNIT to
+ avoid surprises about sbrk alignment conventions.
+ * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+ (raymond@es.ele.tue.nl) for the suggestion.
+ * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+ * More precautions for cases where other routines call sbrk,
+ courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+ * Added macros etc., allowing use in linux libc from
+ H.J. Lu (hjl@gnu.ai.mit.edu)
+ * Inverted this history list
+
+ V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee)
+ * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+ * Removed all preallocation code since under current scheme
+ the work required to undo bad preallocations exceeds
+ the work saved in good cases for most test programs.
+ * No longer use return list or unconsolidated bins since
+ no scheme using them consistently outperforms those that don't
+ given above changes.
+ * Use best fit for very large chunks to prevent some worst-cases.
+ * Added some support for debugging
+
+ V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee)
+ * Removed footers when chunks are in use. Thanks to
+ Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+ V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee)
+ * Added malloc_trim, with help from Wolfram Gloger
+ (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+ V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g)
+
+ V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g)
+ * realloc: try to expand in both directions
+ * malloc: swap order of clean-bin strategy;
+ * realloc: only conditionally expand backwards
+ * Try not to scavenge used bins
+ * Use bin counts as a guide to preallocation
+ * Occasionally bin return list chunks in first scan
+ * Add a few optimizations from colin@nyx10.cs.du.edu
+
+ V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g)
+ * faster bin computation & slightly different binning
+ * merged all consolidations to one part of malloc proper
+ (eliminating old malloc_find_space & malloc_clean_bin)
+ * Scan 2 returns chunks (not just 1)
+ * Propagate failure in realloc if malloc returns 0
+ * Add stuff to allow compilation on non-ANSI compilers
+ from kpv@research.att.com
+
+ V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu)
+ * removed potential for odd address access in prev_chunk
+ * removed dependency on getpagesize.h
+ * misc cosmetics and a bit more internal documentation
+ * anticosmetics: mangled names in macros to evade debugger strangeness
+ * tested on sparc, hp-700, dec-mips, rs6000
+ with gcc & native cc (hp, dec only) allowing
+ Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+ Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu)
+ * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+ structure of old version, but most details differ.)
+
+*/
+
+#endif
diff --git a/drivers/nedmalloc/nedmalloc.cpp b/drivers/nedmalloc/nedmalloc.cpp
index 8845d96549..9aac277a2a 100644
--- a/drivers/nedmalloc/nedmalloc.cpp
+++ b/drivers/nedmalloc/nedmalloc.cpp
@@ -1,1467 +1,1467 @@
-#ifdef NEDMALLOC_ENABLED
-/* Alternative malloc implementation for multiple threads without
-lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-#ifdef _MSC_VER
-/* Enable full aliasing on MSVC */
-/*#pragma optimize("a", on)*/
-#pragma warning(push)
-#pragma warning(disable:4100) /* unreferenced formal parameter */
-#pragma warning(disable:4127) /* conditional expression is constant */
-#pragma warning(disable:4706) /* assignment within conditional expression */
-#endif
-
-/*#define ENABLE_TOLERANT_NEDMALLOC 1*/
-/*#define ENABLE_FAST_HEAP_DETECTION 1*/
-/*#define NEDMALLOC_DEBUG 1*/
-
-/*#define FULLSANITYCHECKS*/
-/* If link time code generation is on, don't force or prevent inlining */
-#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS)
-#define FORCEINLINE
-#define NOINLINE
-#endif
-
-
-#include "nedmalloc.h"
-#ifdef WIN32
- #include <malloc.h>
- #include <stddef.h>
-#endif
-#if USE_ALLOCATOR==1
- #define MSPACES 1
- #define ONLY_MSPACES 1
-#endif
-#define USE_DL_PREFIX 1
-#ifndef USE_LOCKS
- #define USE_LOCKS 1
-#endif
-#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */
-#ifndef NEDMALLOC_DEBUG
- #if defined(DEBUG) || defined(_DEBUG)
- #define NEDMALLOC_DEBUG 1
- #else
- #define NEDMALLOC_DEBUG 0
- #endif
-#endif
-/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */
-#undef DEBUG
-#undef _DEBUG
-#if NEDMALLOC_DEBUG
- #define _DEBUG
- #define DEBUG 1
-#else
- #define DEBUG 0
-#endif
-#ifdef NDEBUG /* Disable assert checking on release builds */
- #undef DEBUG
- #undef _DEBUG
-#endif
-/* The default of 64Kb means we spend too much time kernel-side */
-#ifndef DEFAULT_GRANULARITY
-#define DEFAULT_GRANULARITY (1*1024*1024)
-#if DEBUG
-#define DEFAULT_GRANULARITY_ALIGNED
-#endif
-#endif
-/*#define USE_SPIN_LOCKS 0*/
-
-
-#include "malloc.c.h"
-#ifdef NDEBUG /* Disable assert checking on release builds */
- #undef DEBUG
-#elif !NEDMALLOC_DEBUG
- #ifdef __GNUC__
- #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.
- #elif defined(_MSC_VER)
- #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.")
- #endif
-#endif
-
-/* The maximum concurrent threads in a pool possible */
-#ifndef MAXTHREADSINPOOL
-#define MAXTHREADSINPOOL 16
-#endif
-/* The maximum number of threadcaches which can be allocated */
-#ifndef THREADCACHEMAXCACHES
-#define THREADCACHEMAXCACHES 256
-#endif
-/* The maximum size to be allocated from the thread cache */
-#ifndef THREADCACHEMAX
-#define THREADCACHEMAX 8192
-#endif
-#if 0
-/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
-#define THREADCACHEMAXBINS ((13-4)*2)
-#else
-/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
-#define THREADCACHEMAXBINS (13-4)
-#endif
-/* Point at which the free space in a thread cache is garbage collected */
-#ifndef THREADCACHEMAXFREESPACE
-#define THREADCACHEMAXFREESPACE (512*1024)
-#endif
-
-
-#ifdef WIN32
- #define TLSVAR DWORD
- #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
- #define TLSFREE(k) (!TlsFree(k))
- #define TLSGET(k) TlsGetValue(k)
- #define TLSSET(k, a) (!TlsSetValue(k, a))
- #ifdef DEBUG
-static LPVOID ChkedTlsGetValue(DWORD idx)
-{
- LPVOID ret=TlsGetValue(idx);
- assert(S_OK==GetLastError());
- return ret;
-}
- #undef TLSGET
- #define TLSGET(k) ChkedTlsGetValue(k)
- #endif
-#else
- #define TLSVAR pthread_key_t
- #define TLSALLOC(k) pthread_key_create(k, 0)
- #define TLSFREE(k) pthread_key_delete(k)
- #define TLSGET(k) pthread_getspecific(k)
- #define TLSSET(k, a) pthread_setspecific(k, a)
-#endif
-
-#if defined(__cplusplus)
-#if !defined(NO_NED_NAMESPACE)
-namespace nedalloc {
-#else
-extern "C" {
-#endif
-#endif
-
-#if USE_ALLOCATOR==0
-static void *unsupported_operation(const char *opname) THROWSPEC
-{
- fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname);
- abort();
- return 0;
-}
-static size_t mspacecounter=(size_t) 0xdeadbeef;
-#endif
-#ifndef ENABLE_FAST_HEAP_DETECTION
-static void *RESTRICT leastusedaddress;
-static size_t largestusedblock;
-#endif
-
-static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
-{
- void *RESTRICT ret=0;
- size_t _alignment=alignment;
-#if USE_MAGIC_HEADERS
- size_t *_ret=0;
- size+=alignment+3*sizeof(size_t);
- _alignment=0;
-#endif
-#if USE_ALLOCATOR==0
- ret=_alignment ?
-#ifdef _MSC_VER
- /* This is the MSVCRT equivalent */
- _aligned_malloc(size, _alignment)
-#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
- /* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent. */
- memalign(_alignment, size)
-#else
-#error Cannot aligned allocate with the memory allocator of an unknown system!
-#endif
- : malloc(size);
-#elif USE_ALLOCATOR==1
- ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size);
-#ifndef ENABLE_FAST_HEAP_DETECTION
- if(ret)
- {
- size_t truesize=chunksize(mem2chunk(ret));
- if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
- if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
- }
-#endif
-#endif
- if(!ret) return 0;
-#if USE_MAGIC_HEADERS
- _ret=(size_t *) ret;
- ret=(void *)(_ret+3);
- if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
- for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC";
- _ret[0]=(size_t) mspace;
- _ret[1]=size-3*sizeof(size_t);
-#endif
- return ret;
-}
-
-static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
-{
- void *RESTRICT ret=0;
-#if USE_MAGIC_HEADERS
- size_t *_ret=0;
- size+=alignment+3*sizeof(size_t);
-#endif
-#if USE_ALLOCATOR==0
- ret=calloc(1, size);
-#elif USE_ALLOCATOR==1
- ret=mspace_calloc((mstate) mspace, 1, size);
-#ifndef ENABLE_FAST_HEAP_DETECTION
- if(ret)
- {
- size_t truesize=chunksize(mem2chunk(ret));
- if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
- if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
- }
-#endif
-#endif
- if(!ret) return 0;
-#if USE_MAGIC_HEADERS
- _ret=(size_t *) ret;
- ret=(void *)(_ret+3);
- if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
- for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
- _ret[0]=(size_t) mspace;
- _ret[1]=size-3*sizeof(size_t);
-#endif
- return ret;
-}
-
-static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC
-{
- void *RESTRICT ret=0;
-#if USE_MAGIC_HEADERS
- mstate oldmspace=0;
- size_t *_ret=0, *_mem=(size_t *) mem-3;
-#endif
- if(isforeign)
- { /* Transfer */
-#if USE_MAGIC_HEADERS
- assert(_mem[0]!=*(size_t *) "NEDMALOC");
-#endif
- if((ret=CallMalloc(mspace, newsize, 0)))
- {
-#if defined(DEBUG)
- printf("*** nedmalloc frees system allocated block %p\n", mem);
-#endif
- memcpy(ret, mem, oldsize<newsize ? oldsize : newsize);
- free(mem);
- }
- return ret;
- }
-#if USE_MAGIC_HEADERS
- assert(_mem[0]==*(size_t *) "NEDMALOC");
- newsize+=3*sizeof(size_t);
- oldmspace=(mstate) _mem[1];
- assert(oldsize>=_mem[2]);
- for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
- mem=(void *)(++_mem);
-#endif
-#if USE_ALLOCATOR==0
- ret=realloc(mem, newsize);
-#elif USE_ALLOCATOR==1
- ret=mspace_realloc((mstate) mspace, mem, newsize);
-#ifndef ENABLE_FAST_HEAP_DETECTION
- if(ret)
- {
- size_t truesize=chunksize(mem2chunk(ret));
- if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
- }
-#endif
-#endif
- if(!ret)
- { /* Put it back the way it was */
-#if USE_MAGIC_HEADERS
- for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC");
-#endif
- return 0;
- }
-#if USE_MAGIC_HEADERS
- _ret=(size_t *) ret;
- ret=(void *)(_ret+3);
- for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
- _ret[0]=(size_t) mspace;
- _ret[1]=newsize-3*sizeof(size_t);
-#endif
- return ret;
-}
-
-static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC
-{
-#if USE_MAGIC_HEADERS
- mstate oldmspace=0;
- size_t *_mem=(size_t *) mem-3, oldsize=0;
-#endif
- if(isforeign)
- {
-#if USE_MAGIC_HEADERS
- assert(_mem[0]!=*(size_t *) "NEDMALOC");
-#endif
-#if defined(DEBUG)
- printf("*** nedmalloc frees system allocated block %p\n", mem);
-#endif
- free(mem);
- return;
- }
-#if USE_MAGIC_HEADERS
- assert(_mem[0]==*(size_t *) "NEDMALOC");
- oldmspace=(mstate) _mem[1];
- oldsize=_mem[2];
- for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
- mem=(void *)(++_mem);
-#endif
-#if USE_ALLOCATOR==0
- free(mem);
-#elif USE_ALLOCATOR==1
- mspace_free((mstate) mspace, mem);
-#endif
-}
-
-static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC
-{
- if(mem)
- {
-#if USE_MAGIC_HEADERS
- size_t *_mem=(size_t *) mem-3;
- if(_mem[0]==*(size_t *) "NEDMALOC")
- {
- return (mstate) _mem[1];
- }
- else return 0;
-#else
-#if USE_ALLOCATOR==0
- /* Fail everything */
- return 0;
-#elif USE_ALLOCATOR==1
-#ifdef ENABLE_FAST_HEAP_DETECTION
-#ifdef WIN32
- /* On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header
- which looks like:
- normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ]
- mmaped: 4 bytes of size 4 bytes of [zero, zero, 0xb, zero ]
-
- On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land).
- */
-#pragma pack(push, 1)
- struct _HEAP_ENTRY
- {
- USHORT Size;
- USHORT PreviousSize;
- UCHAR Cookie; /* SegmentIndex */
- UCHAR Flags; /* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */
- UCHAR UnusedBytes;
- UCHAR SmallTagIndex; /* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */
- } *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1;
-#pragma pack(pop)
- unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2;
- result1=header & mask1; /* Positive testing for NT heap */
- result2=header & mask2; /* Positive testing for dlmalloc */
- if(result1==0x00000100 && result2!=0x00000102)
- { /* This is likely a NT heap block */
- return 0;
- }
-#endif
-#ifdef __linux__
- /* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish
- when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr
- down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */
- mchunkptr p=mem2chunk(mem);
- mstate fm=get_mstate_for(p);
- /* If it's a ptmalloc2 block, fm is likely to be some crazy value */
- if(!is_aligned(fm)) return 0;
- if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
- if(ok_magic(fm))
- return fm;
- else
- return 0;
- if(1) { }
-#endif
- else
- {
- mchunkptr p=mem2chunk(mem);
- mstate fm=get_mstate_for(p);
- assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
- if(ok_magic(fm))
- return fm;
- }
-#else
-//#ifdef WIN32
-// __try
-//#endif
- {
- /* We try to return zero here if it isn't one of our own blocks, however
- the current block annotation scheme used by dlmalloc makes it impossible
- to be absolutely sure of avoiding a segfault.
-
- mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block;
- mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS
- FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently
- in use unless mmap), bit 2 is UNUSED and currently is always zero.
- */
- register void *RESTRICT leastusedaddress_=leastusedaddress; /* Cache these to avoid register reloading */
- register size_t largestusedblock_=largestusedblock;
- if(!is_aligned(mem)) return 0; /* Would fail very rarely as all allocators return aligned blocks */
- if(mem<leastusedaddress_) return 0; /* Simple but effective */
- {
- mchunkptr p=mem2chunk(mem);
- mstate fm=0;
- int ismmapped=is_mmapped(p);
- if((!ismmapped && !is_inuse(p)) || (p->head & FLAG4_BIT)) return 0;
- /* Reduced uncertainty by 0.5^2 = 25.0% */
- /* size should never exceed largestusedblock */
- if(chunksize(p)>largestusedblock_) return 0;
- /* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */
- /* Having sanity checked prev_foot and head, check next block */
- if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0;
- /* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */
- #if 0
- /* If previous block is free, check that its next block pointer equals us */
- if(!ismmapped && !pinuse(p))
- if(next_chunk(prev_chunk(p))!=p) return 0;
- /* We could start comparing prev_foot's for similarity but it starts getting slow. */
- #endif
- fm = get_mstate_for(p);
- if(!is_aligned(fm) || (void *)fm<leastusedaddress_) return 0;
- if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
- assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
- if(ok_magic(fm))
- return fm;
- }
- }
-//#ifdef WIN32
-// __except(1) { }
-//#endif
-#endif
-#endif
-#endif
- }
- return 0;
-}
-NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC
-{
- if(mem)
- {
- if(isforeign) *isforeign=1;
-#if USE_MAGIC_HEADERS
- {
- size_t *_mem=(size_t *) mem-3;
- if(_mem[0]==*(size_t *) "NEDMALOC")
- {
- mstate mspace=(mstate) _mem[1];
- size_t size=_mem[2];
- if(isforeign) *isforeign=0;
- return size;
- }
- }
-#elif USE_ALLOCATOR==1
- if(nedblkmstate(mem))
- {
- mchunkptr p=mem2chunk(mem);
- if(isforeign) *isforeign=0;
- return chunksize(p)-overhead_for(p);
- }
-#ifdef DEBUG
- else
- {
- int a=1; /* Set breakpoints here if needed */
- }
-#endif
-#endif
-#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0
-#ifdef _MSC_VER
- /* This is the MSVCRT equivalent */
- return _msize(mem);
-#elif defined(__linux__)
- /* This is the glibc/ptmalloc2/dlmalloc equivalent. */
- return malloc_usable_size(mem);
-#elif defined(__FreeBSD__) || defined(__APPLE__)
- /* This is the BSD libc equivalent. */
- return malloc_size(mem);
-#else
-#error Cannot tolerate the memory allocator of an unknown system!
-#endif
-#endif
- }
- return 0;
-}
-
-NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc((nedpool *) 0, size); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc((nedpool *) 0, no, size); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc((nedpool *) 0, mem, size); }
-NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC { nedpfree((nedpool *) 0, mem); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign((nedpool *) 0, alignment, bytes); }
-NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo((nedpool *) 0); }
-NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt((nedpool *) 0, parno, value); }
-NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim((nedpool *) 0, pad); }
-void nedmalloc_stats() THROWSPEC { nedpmalloc_stats((nedpool *) 0); }
-NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint((nedpool *) 0); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); }
-
-struct threadcacheblk_t;
-typedef struct threadcacheblk_t threadcacheblk;
-struct threadcacheblk_t
-{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
-#ifdef FULLSANITYCHECKS
- unsigned int magic;
-#endif
- unsigned int lastUsed, size;
- threadcacheblk *next, *prev;
-};
-typedef struct threadcache_t
-{
-#ifdef FULLSANITYCHECKS
- unsigned int magic1;
-#endif
- int mymspace; /* Last mspace entry this thread used */
- long threadid;
- unsigned int mallocs, frees, successes;
- size_t freeInCache; /* How much free space is stored in this cache */
- threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
-#ifdef FULLSANITYCHECKS
- unsigned int magic2;
-#endif
-} threadcache;
-struct nedpool_t
-{
- MLOCK_T mutex;
- void *uservalue;
- int threads; /* Max entries in m to use */
- threadcache *caches[THREADCACHEMAXCACHES];
- TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
- mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */
-};
-static nedpool syspool;
-
-static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC
-{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */
- unsigned int topbit, size=(unsigned int)(_size>>4);
- /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */
-
-#if defined(__GNUC__)
- topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
-#elif defined(_MSC_VER) && _MSC_VER>=1300
- {
- unsigned long bsrTopBit;
-
- _BitScanReverse(&bsrTopBit, size);
-
- topbit = bsrTopBit;
- }
-#else
-#if 0
- union {
- unsigned asInt[2];
- double asDouble;
- };
- int n;
-
- asDouble = (double)size + 0.5;
- topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
-#else
- {
- unsigned int x=size;
- x = x | (x >> 1);
- x = x | (x >> 2);
- x = x | (x >> 4);
- x = x | (x >> 8);
- x = x | (x >>16);
- x = ~x;
- x = x - ((x >> 1) & 0x55555555);
- x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
- x = (x + (x >> 4)) & 0x0F0F0F0F;
- x = x + (x << 8);
- x = x + (x << 16);
- topbit=31 - (x >> 24);
- }
-#endif
-#endif
- return topbit;
-}
-
-
-#ifdef FULLSANITYCHECKS
-static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
-{
- assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
- if(ptr[0] && ptr[1])
- {
- assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
- assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
- assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
- assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
- assert(!ptr[0]->prev);
- assert(!ptr[1]->next);
- if(ptr[0]==ptr[1])
- {
- assert(!ptr[0]->next);
- assert(!ptr[1]->prev);
- }
- }
-}
-static void tcfullsanitycheck(threadcache *tc) THROWSPEC
-{
- threadcacheblk **tcbptr=tc->bins;
- int n;
- for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
- {
- threadcacheblk *b, *ob=0;
- tcsanitycheck(tcbptr);
- for(b=tcbptr[0]; b; ob=b, b=b->next)
- {
- assert(*(unsigned int *) "NEDN"==b->magic);
- assert(!ob || ob->next==b);
- assert(!ob || b->prev==ob);
- }
- }
-}
-#endif
-
-static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC
-{
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- if(tc->freeInCache)
- {
- threadcacheblk **tcbptr=tc->bins;
- int n;
- for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
- {
- threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */
- /*tcsanitycheck(tcbptr);*/
- for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
- {
- threadcacheblk *f=*tcb;
- size_t blksize=f->size; /*nedblksize(f);*/
- assert(blksize<=nedblksize(0, f));
- assert(blksize);
-#ifdef FULLSANITYCHECKS
- assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
-#endif
- *tcb=(*tcb)->prev;
- if(*tcb)
- (*tcb)->next=0;
- else
- *tcbptr=0;
- tc->freeInCache-=blksize;
- assert((long) tc->freeInCache>=0);
- CallFree(0, f, 0);
- /*tcsanitycheck(tcbptr);*/
- }
- }
- }
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
-}
-static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC
-{
- if(p->caches)
- {
- threadcache *tc;
- int n;
- for(n=0; n<THREADCACHEMAXCACHES; n++)
- {
- if((tc=p->caches[n]))
- {
- tc->frees++;
- RemoveCacheEntries(p, tc, 0);
- assert(!tc->freeInCache);
- tc->mymspace=-1;
- tc->threadid=0;
- CallFree(0, tc, 0);
- p->caches[n]=0;
- }
- }
- }
-}
-
-static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC
-{
- threadcache *tc=0;
- int n, end;
- ACQUIRE_LOCK(&p->mutex);
- for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
- if(THREADCACHEMAXCACHES==n)
- { /* List exhausted, so disable for this thread */
- RELEASE_LOCK(&p->mutex);
- return 0;
- }
- tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0);
- if(!tc)
- {
- RELEASE_LOCK(&p->mutex);
- return 0;
- }
-#ifdef FULLSANITYCHECKS
- tc->magic1=*(unsigned int *)"NEDMALC1";
- tc->magic2=*(unsigned int *)"NEDMALC2";
-#endif
- tc->threadid=(long)(size_t)CURRENT_THREAD;
- for(end=0; p->m[end]; end++);
- tc->mymspace=abs(tc->threadid) % end;
- RELEASE_LOCK(&p->mutex);
- if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
- return tc;
-}
-
-static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC
-{
- void *RESTRICT ret=0;
- size_t size=*_size, blksize=0;
- unsigned int bestsize;
- unsigned int idx=size2binidx(size);
- threadcacheblk *RESTRICT blk, **RESTRICT binsptr;
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- /* Calculate best fit bin size */
- bestsize=1<<(idx+4);
-#if 0
- /* Finer grained bin fit */
- idx<<=1;
- if(size>bestsize)
- {
- idx++;
- bestsize+=bestsize>>1;
- }
- if(size>bestsize)
- {
- idx++;
- bestsize=1<<(4+(idx>>1));
- }
-#else
- if(size>bestsize)
- {
- idx++;
- bestsize<<=1;
- }
-#endif
- assert(bestsize>=size);
- if(size<bestsize) size=bestsize;
- assert(size<=THREADCACHEMAX);
- assert(idx<=THREADCACHEMAXBINS);
- binsptr=&tc->bins[idx*2];
- /* Try to match close, but move up a bin if necessary */
- blk=*binsptr;
- if(!blk || blk->size<size)
- { /* Bump it up a bin */
- if(idx<THREADCACHEMAXBINS)
- {
- idx++;
- binsptr+=2;
- blk=*binsptr;
- }
- }
- if(blk)
- {
- blksize=blk->size; /*nedblksize(blk);*/
- assert(nedblksize(0, blk)>=blksize);
- assert(blksize>=size);
- if(blk->next)
- blk->next->prev=0;
- *binsptr=blk->next;
- if(!*binsptr)
- binsptr[1]=0;
-#ifdef FULLSANITYCHECKS
- blk->magic=0;
-#endif
- assert(binsptr[0]!=blk && binsptr[1]!=blk);
- assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
- /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/
- ret=(void *) blk;
- }
- ++tc->mallocs;
- if(ret)
- {
- assert(blksize>=size);
- ++tc->successes;
- tc->freeInCache-=blksize;
- assert((long) tc->freeInCache>=0);
- }
-#if defined(DEBUG) && 0
- if(!(tc->mallocs & 0xfff))
- {
- printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
- (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
- }
-#endif
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- *_size=size;
- return ret;
-}
-static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC
-{
- unsigned int age=THREADCACHEMAXFREESPACE/8192;
- /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
- while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
- {
- RemoveCacheEntries(p, tc, age);
- /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
- age>>=1;
- }
- /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
-}
-static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC
-{
- unsigned int bestsize;
- unsigned int idx=size2binidx(size);
- threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem;
- assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
-#ifdef DEBUG
- /* Make sure this is a valid memory block */
- assert(nedblksize(0, mem));
-#endif
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- /* Calculate best fit bin size */
- bestsize=1<<(idx+4);
-#if 0
- /* Finer grained bin fit */
- idx<<=1;
- if(size>bestsize)
- {
- unsigned int biggerbestsize=bestsize+bestsize<<1;
- if(size>=biggerbestsize)
- {
- idx++;
- bestsize=biggerbestsize;
- }
- }
-#endif
- if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */
- size=bestsize;
- binsptr=&tc->bins[idx*2];
- assert(idx<=THREADCACHEMAXBINS);
- if(tck==*binsptr)
- {
- fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck);
- abort();
- }
-#ifdef FULLSANITYCHECKS
- tck->magic=*(unsigned int *) "NEDN";
-#endif
- tck->lastUsed=++tc->frees;
- tck->size=(unsigned int) size;
- tck->next=*binsptr;
- tck->prev=0;
- if(tck->next)
- tck->next->prev=tck;
- else
- binsptr[1]=tck;
- assert(!*binsptr || (*binsptr)->size==tck->size);
- *binsptr=tck;
- assert(tck==tc->bins[idx*2]);
- assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
- /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
- tc->freeInCache+=size;
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
-#if 1
- if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
- ReleaseFreeInCache(p, tc, mymspace);
-#endif
-}
-
-
-
-
-static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC
-{ /* threads is -1 for system pool */
- ensure_initialization();
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- if(p->threads) goto done;
- if(INITIAL_LOCK(&p->mutex)) goto err;
- if(TLSALLOC(&p->mycache)) goto err;
-#if USE_ALLOCATOR==0
- p->m[0]=(mstate) mspacecounter++;
-#elif USE_ALLOCATOR==1
- if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
- p->m[0]->extp=p;
-#endif
- p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
-done:
- RELEASE_MALLOC_GLOBAL_LOCK();
- return 1;
-err:
- if(threads<0)
- abort(); /* If you can't allocate for system pool, we're screwed */
- DestroyCaches(p);
- if(p->m[0])
- {
-#if USE_ALLOCATOR==1
- destroy_mspace(p->m[0]);
-#endif
- p->m[0]=0;
- }
- if(p->mycache)
- {
- if(TLSFREE(p->mycache)) abort();
- p->mycache=0;
- }
- RELEASE_MALLOC_GLOBAL_LOCK();
- return 0;
-}
-static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC
-{ /* Gets called when thread's last used mspace is in use. The strategy
- is to run through the list of all available mspaces looking for an
- unlocked one and if we fail, we create a new one so long as we don't
- exceed p->threads */
- int n, end;
- for(n=end=*lastUsed+1; p->m[n]; end=++n)
- {
- if(TRY_LOCK(&p->m[n]->mutex)) goto found;
- }
- for(n=0; n<*lastUsed && p->m[n]; n++)
- {
- if(TRY_LOCK(&p->m[n]->mutex)) goto found;
- }
- if(end<p->threads)
- {
- mstate temp;
-#if USE_ALLOCATOR==0
- temp=(mstate) mspacecounter++;
-#elif USE_ALLOCATOR==1
- if(!(temp=(mstate) create_mspace(size, 1)))
- goto badexit;
-#endif
- /* Now we're ready to modify the lists, we lock */
- ACQUIRE_LOCK(&p->mutex);
- while(p->m[end] && end<p->threads)
- end++;
- if(end>=p->threads)
- { /* Drat, must destroy it now */
- RELEASE_LOCK(&p->mutex);
-#if USE_ALLOCATOR==1
- destroy_mspace((mstate) temp);
-#endif
- goto badexit;
- }
- /* We really want to make sure this goes into memory now but we
- have to be careful of breaking aliasing rules, so write it twice */
- *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp;
- ACQUIRE_LOCK(&p->m[end]->mutex);
- /*printf("Created mspace idx %d\n", end);*/
- RELEASE_LOCK(&p->mutex);
- n=end;
- goto found;
- }
- /* Let it lock on the last one it used */
-badexit:
- ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
- return p->m[*lastUsed];
-found:
- *lastUsed=n;
- if(tc)
- tc->mymspace=n;
- else
- {
- if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
- }
- return p->m[n];
-}
-
-typedef struct PoolList_t
-{
- size_t size; /* Size of list */
- size_t length; /* Actual entries in list */
-#ifdef DEBUG
- nedpool *list[1]; /* Force testing of list expansion */
-#else
- nedpool *list[16];
-#endif
-} PoolList;
-static MLOCK_T poollistlock;
-static PoolList *poollist;
-NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
-{
- nedpool *ret=0;
- if(!poollist)
- {
- PoolList *newpoollist=0;
- if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0;
- INITIAL_LOCK(&poollistlock);
- ACQUIRE_LOCK(&poollistlock);
- poollist=newpoollist;
- poollist->size=sizeof(poollist->list)/sizeof(nedpool *);
- }
- else
- ACQUIRE_LOCK(&poollistlock);
- if(poollist->length==poollist->size)
- {
- PoolList *newpoollist=0;
- size_t newsize=0;
- newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *);
- if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit;
- poollist=newpoollist;
- memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0]));
- poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1;
- assert(poollist->size>poollist->length);
- }
- if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit;
- if(!InitPool(ret, capacity, threads))
- {
- nedpfree(0, ret);
- goto badexit;
- }
- poollist->list[poollist->length++]=ret;
-badexit:
- RELEASE_LOCK(&poollistlock);
- return ret;
-}
-void neddestroypool(nedpool *p) THROWSPEC
-{
- unsigned int n;
- ACQUIRE_LOCK(&p->mutex);
- DestroyCaches(p);
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- destroy_mspace(p->m[n]);
-#endif
- p->m[n]=0;
- }
- RELEASE_LOCK(&p->mutex);
- if(TLSFREE(p->mycache)) abort();
- nedpfree(0, p);
- ACQUIRE_LOCK(&poollistlock);
- assert(poollist);
- for(n=0; n<poollist->length && poollist->list[n]!=p; n++);
- assert(n!=poollist->length);
- memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]);
- if(!--poollist->length)
- {
- assert(!poollist->list[0]);
- nedpfree(0, poollist);
- poollist=0;
- }
- RELEASE_LOCK(&poollistlock);
-}
-void neddestroysyspool() THROWSPEC
-{
- nedpool *p=&syspool;
- int n;
- ACQUIRE_LOCK(&p->mutex);
- DestroyCaches(p);
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- destroy_mspace(p->m[n]);
-#endif
- p->m[n]=0;
- }
- /* Render syspool unusable */
- for(n=0; n<THREADCACHEMAXCACHES; n++)
- p->caches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
- for(n=0; n<MAXTHREADSINPOOL+1; n++)
- p->m[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
- if(TLSFREE(p->mycache)) abort();
- RELEASE_LOCK(&p->mutex);
-}
-nedpool **nedpoollist() THROWSPEC
-{
- nedpool **ret=0;
- if(poollist)
- {
- ACQUIRE_LOCK(&poollistlock);
- if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit;
- memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *));
-badexit:
- RELEASE_LOCK(&poollistlock);
- }
- return ret;
-}
-
-void nedpsetvalue(nedpool *p, void *v) THROWSPEC
-{
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- p->uservalue=v;
-}
-void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
-{
- nedpool *np=0;
- mstate fm=nedblkmstate(mem);
- if(!fm || !fm->extp) return 0;
- np=(nedpool *) fm->extp;
- if(p) *p=np;
- return np->uservalue;
-}
-
-void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC
-{
- int mycache;
- if(!p)
- {
- p=&syspool;
- if(!syspool.threads) InitPool(&syspool, 0, -1);
- }
- mycache=(int)(size_t) TLSGET(p->mycache);
- if(!mycache)
- { /* Set to mspace 0 */
- if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort();
- }
- else if(mycache>0)
- { /* Set to last used mspace */
- threadcache *tc=p->caches[mycache-1];
-#if defined(DEBUG)
- printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
- 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
-#endif
- if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
- tc->frees++;
- RemoveCacheEntries(p, tc, 0);
- assert(!tc->freeInCache);
- if(disable)
- {
- tc->mymspace=-1;
- tc->threadid=0;
- CallFree(0, p->caches[mycache-1], 0);
- p->caches[mycache-1]=0;
- }
- }
-}
-void neddisablethreadcache(nedpool *p) THROWSPEC
-{
- nedtrimthreadcache(p, 1);
-}
-
-#define GETMSPACE(m,p,tc,ms,s,action) \
- do \
- { \
- mstate m = GetMSpace((p),(tc),(ms),(s)); \
- action; \
- if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \
- } while (0)
-
-static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC
-{ /* Returns a locked and ready for use mspace */
- mstate m=p->m[mymspace];
- assert(m);
-#if USE_ALLOCATOR==1
- if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);
- /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
-#endif
- return m;
-}
-static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC
-{
- *p=&syspool;
- if(!syspool.threads) InitPool(&syspool, 0, -1);
-}
-static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC
-{
- if(!mycache)
- { /* Need to allocate a new cache */
- *tc=AllocCache(*p);
- if(!*tc)
- { /* Disable */
- if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort();
- *mymspace=0;
- }
- else
- *mymspace=(*tc)->mymspace;
- }
- else
- { /* Cache disabled, but we do have an assigned thread pool */
- *tc=0;
- *mymspace=-mycache-1;
- }
-}
-static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC
-{
- int mycache;
- if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
- if(!*p)
- GetThreadCache_cold1(p);
- mycache=(int)(size_t) TLSGET((*p)->mycache);
- if(mycache>0)
- { /* Already have a cache */
- *tc=(*p)->caches[mycache-1];
- *mymspace=(*tc)->mymspace;
- }
- else GetThreadCache_cold2(p, tc, mymspace, mycache);
- assert(*mymspace>=0);
- assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid);
-#ifdef FULLSANITYCHECKS
- if(*tc)
- {
- if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
- {
- abort();
- }
- }
-#endif
-}
-
-NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
-{
- void *ret=0;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &size);
-#if THREADCACHEMAX
- if(tc && size<=THREADCACHEMAX)
- { /* Use the thread cache */
- ret=threadcache_malloc(p, tc, &size);
- }
-#endif
- if(!ret)
- { /* Use this thread's mspace */
- GETMSPACE(m, p, tc, mymspace, size,
- ret=CallMalloc(m, size, 0));
- }
- return ret;
-}
-NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
-{
- size_t rsize=size*no;
- void *ret=0;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &rsize);
-#if THREADCACHEMAX
- if(tc && rsize<=THREADCACHEMAX)
- { /* Use the thread cache */
- if((ret=threadcache_malloc(p, tc, &rsize)))
- memset(ret, 0, rsize);
- }
-#endif
- if(!ret)
- { /* Use this thread's mspace */
- GETMSPACE(m, p, tc, mymspace, rsize,
- ret=CallCalloc(m, rsize, 0));
- }
- return ret;
-}
-NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
-{
- void *ret=0;
- threadcache *tc;
- int mymspace, isforeign=1;
- size_t memsize;
- if(!mem) return nedpmalloc(p, size);
- memsize=nedblksize(&isforeign, mem);
- assert(memsize);
- if(!memsize)
- {
- fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n");
- abort();
- }
- else if(size<=memsize && memsize-size<
-#ifdef DEBUG
- 32
-#else
- 1024
-#endif
- ) /* If realloc size is within 1Kb smaller than existing, noop it */
- return mem;
- GetThreadCache(&p, &tc, &mymspace, &size);
-#if THREADCACHEMAX
- if(tc && size && size<=THREADCACHEMAX)
- { /* Use the thread cache */
- if((ret=threadcache_malloc(p, tc, &size)))
- {
- memcpy(ret, mem, memsize<size ? memsize : size);
- if(memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
- threadcache_free(p, tc, mymspace, mem, memsize);
- else
- CallFree(0, mem, isforeign);
- }
- }
-#endif
- if(!ret)
- { /* Reallocs always happen in the mspace they happened in, so skip
- locking the preferred mspace for this thread */
- ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size);
- }
- return ret;
-}
-void nedpfree(nedpool *p, void *mem) THROWSPEC
-{ /* Frees always happen in the mspace they happened in, so skip
- locking the preferred mspace for this thread */
- threadcache *tc;
- int mymspace, isforeign=1;
- size_t memsize;
- if(!mem)
- { /* If you tried this on FreeBSD you'd be sorry! */
-#ifdef DEBUG
- fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n");
-#endif
- return;
- }
- memsize=nedblksize(&isforeign, mem);
- assert(memsize);
- if(!memsize)
- {
- fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n");
- abort();
- }
- GetThreadCache(&p, &tc, &mymspace, 0);
-#if THREADCACHEMAX
- if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
- threadcache_free(p, tc, mymspace, mem, memsize);
- else
-#endif
- CallFree(0, mem, isforeign);
-}
-NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
-{
- void *ret;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &bytes);
- { /* Use this thread's mspace */
- GETMSPACE(m, p, tc, mymspace, bytes,
- ret=CallMalloc(m, bytes, alignment));
- }
- return ret;
-}
-struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC
-{
- int n;
- struct nedmallinfo ret={0};
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1 && !NO_MALLINFO
- struct mallinfo t=mspace_mallinfo(p->m[n]);
- ret.arena+=t.arena;
- ret.ordblks+=t.ordblks;
- ret.hblkhd+=t.hblkhd;
- ret.usmblks+=t.usmblks;
- ret.uordblks+=t.uordblks;
- ret.fordblks+=t.fordblks;
- ret.keepcost+=t.keepcost;
-#endif
- }
- return ret;
-}
-int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
-{
-#if USE_ALLOCATOR==1
- return mspace_mallopt(parno, value);
-#else
- return 0;
-#endif
-}
-NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC
-{
-#if USE_ALLOCATOR==1
- if(granularity) *granularity=mparams.granularity;
- if(magic) *magic=mparams.magic;
- return (void *) &syspool;
-#else
- if(granularity) *granularity=0;
- if(magic) *magic=0;
- return 0;
-#endif
-}
-int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
-{
- int n, ret=0;
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- ret+=mspace_trim(p->m[n], pad);
-#endif
- }
- return ret;
-}
-void nedpmalloc_stats(nedpool *p) THROWSPEC
-{
- int n;
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- mspace_malloc_stats(p->m[n]);
-#endif
- }
-}
-size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
-{
- size_t ret=0;
- int n;
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- ret+=mspace_footprint(p->m[n]);
-#endif
- }
- return ret;
-}
-NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
-{
- void **ret;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &elemsize);
-#if USE_ALLOCATOR==0
- GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
- ret=unsupported_operation("independent_calloc"));
-#elif USE_ALLOCATOR==1
- GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
- ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
-#endif
- return ret;
-}
-NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
-{
- void **ret;
- threadcache *tc;
- int mymspace;
- size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
- if(!adjustedsizes) return 0;
- for(i=0; i<elems; i++)
- adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
- GetThreadCache(&p, &tc, &mymspace, 0);
-#if USE_ALLOCATOR==0
- GETMSPACE(m, p, tc, mymspace, 0,
- ret=unsupported_operation("independent_comalloc"));
-#elif USE_ALLOCATOR==1
- GETMSPACE(m, p, tc, mymspace, 0,
- ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
-#endif
- return ret;
-}
-
-#if defined(__cplusplus)
-}
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif
+#ifdef NEDMALLOC_ENABLED
+/* Alternative malloc implementation for multiple threads without
+lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+#ifdef _MSC_VER
+/* Enable full aliasing on MSVC */
+/*#pragma optimize("a", on)*/
+#pragma warning(push)
+#pragma warning(disable:4100) /* unreferenced formal parameter */
+#pragma warning(disable:4127) /* conditional expression is constant */
+#pragma warning(disable:4706) /* assignment within conditional expression */
+#endif
+
+/*#define ENABLE_TOLERANT_NEDMALLOC 1*/
+/*#define ENABLE_FAST_HEAP_DETECTION 1*/
+/*#define NEDMALLOC_DEBUG 1*/
+
+/*#define FULLSANITYCHECKS*/
+/* If link time code generation is on, don't force or prevent inlining */
+#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS)
+#define FORCEINLINE
+#define NOINLINE
+#endif
+
+
+#include "nedmalloc.h"
+#ifdef WIN32
+ #include <malloc.h>
+ #include <stddef.h>
+#endif
+#if USE_ALLOCATOR==1
+ #define MSPACES 1
+ #define ONLY_MSPACES 1
+#endif
+#define USE_DL_PREFIX 1
+#ifndef USE_LOCKS
+ #define USE_LOCKS 1
+#endif
+#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */
+#ifndef NEDMALLOC_DEBUG
+ #if defined(DEBUG) || defined(_DEBUG)
+ #define NEDMALLOC_DEBUG 1
+ #else
+ #define NEDMALLOC_DEBUG 0
+ #endif
+#endif
+/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */
+#undef DEBUG
+#undef _DEBUG
+#if NEDMALLOC_DEBUG
+ #define _DEBUG
+ #define DEBUG 1
+#else
+ #define DEBUG 0
+#endif
+#ifdef NDEBUG /* Disable assert checking on release builds */
+ #undef DEBUG
+ #undef _DEBUG
+#endif
+/* The default of 64Kb means we spend too much time kernel-side */
+#ifndef DEFAULT_GRANULARITY
+#define DEFAULT_GRANULARITY (1*1024*1024)
+#if DEBUG
+#define DEFAULT_GRANULARITY_ALIGNED
+#endif
+#endif
+/*#define USE_SPIN_LOCKS 0*/
+
+
+#include "malloc.c.h"
+#ifdef NDEBUG /* Disable assert checking on release builds */
+ #undef DEBUG
+#elif !NEDMALLOC_DEBUG
+ #ifdef __GNUC__
+ #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.
+ #elif defined(_MSC_VER)
+ #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.")
+ #endif
+#endif
+
+/* The maximum concurrent threads in a pool possible */
+#ifndef MAXTHREADSINPOOL
+#define MAXTHREADSINPOOL 16
+#endif
+/* The maximum number of threadcaches which can be allocated */
+#ifndef THREADCACHEMAXCACHES
+#define THREADCACHEMAXCACHES 256
+#endif
+/* The maximum size to be allocated from the thread cache */
+#ifndef THREADCACHEMAX
+#define THREADCACHEMAX 8192
+#endif
+#if 0
+/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
+#define THREADCACHEMAXBINS ((13-4)*2)
+#else
+/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
+#define THREADCACHEMAXBINS (13-4)
+#endif
+/* Point at which the free space in a thread cache is garbage collected */
+#ifndef THREADCACHEMAXFREESPACE
+#define THREADCACHEMAXFREESPACE (512*1024)
+#endif
+
+
+#ifdef WIN32
+ #define TLSVAR DWORD
+ #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
+ #define TLSFREE(k) (!TlsFree(k))
+ #define TLSGET(k) TlsGetValue(k)
+ #define TLSSET(k, a) (!TlsSetValue(k, a))
+ #ifdef DEBUG
+static LPVOID ChkedTlsGetValue(DWORD idx)
+{
+ LPVOID ret=TlsGetValue(idx);
+ assert(S_OK==GetLastError());
+ return ret;
+}
+ #undef TLSGET
+ #define TLSGET(k) ChkedTlsGetValue(k)
+ #endif
+#else
+ #define TLSVAR pthread_key_t
+ #define TLSALLOC(k) pthread_key_create(k, 0)
+ #define TLSFREE(k) pthread_key_delete(k)
+ #define TLSGET(k) pthread_getspecific(k)
+ #define TLSSET(k, a) pthread_setspecific(k, a)
+#endif
+
+#if defined(__cplusplus)
+#if !defined(NO_NED_NAMESPACE)
+namespace nedalloc {
+#else
+extern "C" {
+#endif
+#endif
+
+#if USE_ALLOCATOR==0
+static void *unsupported_operation(const char *opname) THROWSPEC
+{
+ fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname);
+ abort();
+ return 0;
+}
+static size_t mspacecounter=(size_t) 0xdeadbeef;
+#endif
+#ifndef ENABLE_FAST_HEAP_DETECTION
+static void *RESTRICT leastusedaddress;
+static size_t largestusedblock;
+#endif
+
+static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
+{
+ void *RESTRICT ret=0;
+ size_t _alignment=alignment;
+#if USE_MAGIC_HEADERS
+ size_t *_ret=0;
+ size+=alignment+3*sizeof(size_t);
+ _alignment=0;
+#endif
+#if USE_ALLOCATOR==0
+ ret=_alignment ?
+#ifdef _MSC_VER
+ /* This is the MSVCRT equivalent */
+ _aligned_malloc(size, _alignment)
+#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
+ /* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent. */
+ memalign(_alignment, size)
+#else
+#error Cannot aligned allocate with the memory allocator of an unknown system!
+#endif
+ : malloc(size);
+#elif USE_ALLOCATOR==1
+ ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size);
+#ifndef ENABLE_FAST_HEAP_DETECTION
+ if(ret)
+ {
+ size_t truesize=chunksize(mem2chunk(ret));
+ if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
+ if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
+ }
+#endif
+#endif
+ if(!ret) return 0;
+#if USE_MAGIC_HEADERS
+ _ret=(size_t *) ret;
+ ret=(void *)(_ret+3);
+ if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
+ for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC";
+ _ret[0]=(size_t) mspace;
+ _ret[1]=size-3*sizeof(size_t);
+#endif
+ return ret;
+}
+
+static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
+{
+ void *RESTRICT ret=0;
+#if USE_MAGIC_HEADERS
+ size_t *_ret=0;
+ size+=alignment+3*sizeof(size_t);
+#endif
+#if USE_ALLOCATOR==0
+ ret=calloc(1, size);
+#elif USE_ALLOCATOR==1
+ ret=mspace_calloc((mstate) mspace, 1, size);
+#ifndef ENABLE_FAST_HEAP_DETECTION
+ if(ret)
+ {
+ size_t truesize=chunksize(mem2chunk(ret));
+ if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
+ if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
+ }
+#endif
+#endif
+ if(!ret) return 0;
+#if USE_MAGIC_HEADERS
+ _ret=(size_t *) ret;
+ ret=(void *)(_ret+3);
+ if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
+ for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
+ _ret[0]=(size_t) mspace;
+ _ret[1]=size-3*sizeof(size_t);
+#endif
+ return ret;
+}
+
+static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC
+{
+ void *RESTRICT ret=0;
+#if USE_MAGIC_HEADERS
+ mstate oldmspace=0;
+ size_t *_ret=0, *_mem=(size_t *) mem-3;
+#endif
+ if(isforeign)
+ { /* Transfer */
+#if USE_MAGIC_HEADERS
+ assert(_mem[0]!=*(size_t *) "NEDMALOC");
+#endif
+ if((ret=CallMalloc(mspace, newsize, 0)))
+ {
+#if defined(DEBUG)
+ printf("*** nedmalloc frees system allocated block %p\n", mem);
+#endif
+ memcpy(ret, mem, oldsize<newsize ? oldsize : newsize);
+ free(mem);
+ }
+ return ret;
+ }
+#if USE_MAGIC_HEADERS
+ assert(_mem[0]==*(size_t *) "NEDMALOC");
+ newsize+=3*sizeof(size_t);
+ oldmspace=(mstate) _mem[1];
+ assert(oldsize>=_mem[2]);
+ for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
+ mem=(void *)(++_mem);
+#endif
+#if USE_ALLOCATOR==0
+ ret=realloc(mem, newsize);
+#elif USE_ALLOCATOR==1
+ ret=mspace_realloc((mstate) mspace, mem, newsize);
+#ifndef ENABLE_FAST_HEAP_DETECTION
+ if(ret)
+ {
+ size_t truesize=chunksize(mem2chunk(ret));
+ if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
+ }
+#endif
+#endif
+ if(!ret)
+ { /* Put it back the way it was */
+#if USE_MAGIC_HEADERS
+ for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC");
+#endif
+ return 0;
+ }
+#if USE_MAGIC_HEADERS
+ _ret=(size_t *) ret;
+ ret=(void *)(_ret+3);
+ for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
+ _ret[0]=(size_t) mspace;
+ _ret[1]=newsize-3*sizeof(size_t);
+#endif
+ return ret;
+}
+
+static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC
+{
+#if USE_MAGIC_HEADERS
+ mstate oldmspace=0;
+ size_t *_mem=(size_t *) mem-3, oldsize=0;
+#endif
+ if(isforeign)
+ {
+#if USE_MAGIC_HEADERS
+ assert(_mem[0]!=*(size_t *) "NEDMALOC");
+#endif
+#if defined(DEBUG)
+ printf("*** nedmalloc frees system allocated block %p\n", mem);
+#endif
+ free(mem);
+ return;
+ }
+#if USE_MAGIC_HEADERS
+ assert(_mem[0]==*(size_t *) "NEDMALOC");
+ oldmspace=(mstate) _mem[1];
+ oldsize=_mem[2];
+ for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
+ mem=(void *)(++_mem);
+#endif
+#if USE_ALLOCATOR==0
+ free(mem);
+#elif USE_ALLOCATOR==1
+ mspace_free((mstate) mspace, mem);
+#endif
+}
+
+static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC
+{
+ if(mem)
+ {
+#if USE_MAGIC_HEADERS
+ size_t *_mem=(size_t *) mem-3;
+ if(_mem[0]==*(size_t *) "NEDMALOC")
+ {
+ return (mstate) _mem[1];
+ }
+ else return 0;
+#else
+#if USE_ALLOCATOR==0
+ /* Fail everything */
+ return 0;
+#elif USE_ALLOCATOR==1
+#ifdef ENABLE_FAST_HEAP_DETECTION
+#ifdef WIN32
+ /* On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header
+ which looks like:
+ normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ]
+ mmaped: 4 bytes of size 4 bytes of [zero, zero, 0xb, zero ]
+
+ On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land).
+ */
+#pragma pack(push, 1)
+ struct _HEAP_ENTRY
+ {
+ USHORT Size;
+ USHORT PreviousSize;
+ UCHAR Cookie; /* SegmentIndex */
+ UCHAR Flags; /* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */
+ UCHAR UnusedBytes;
+ UCHAR SmallTagIndex; /* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */
+ } *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1;
+#pragma pack(pop)
+ unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2;
+ result1=header & mask1; /* Positive testing for NT heap */
+ result2=header & mask2; /* Positive testing for dlmalloc */
+ if(result1==0x00000100 && result2!=0x00000102)
+ { /* This is likely a NT heap block */
+ return 0;
+ }
+#endif
+#ifdef __linux__
+ /* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish
+ when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr
+ down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */
+ mchunkptr p=mem2chunk(mem);
+ mstate fm=get_mstate_for(p);
+ /* If it's a ptmalloc2 block, fm is likely to be some crazy value */
+ if(!is_aligned(fm)) return 0;
+ if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
+ if(ok_magic(fm))
+ return fm;
+ else
+ return 0;
+ if(1) { }
+#endif
+ else
+ {
+ mchunkptr p=mem2chunk(mem);
+ mstate fm=get_mstate_for(p);
+ assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
+ if(ok_magic(fm))
+ return fm;
+ }
+#else
+//#ifdef WIN32
+// __try
+//#endif
+ {
+ /* We try to return zero here if it isn't one of our own blocks, however
+ the current block annotation scheme used by dlmalloc makes it impossible
+ to be absolutely sure of avoiding a segfault.
+
+ mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block;
+ mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS
+ FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently
+ in use unless mmap), bit 2 is UNUSED and currently is always zero.
+ */
+ register void *RESTRICT leastusedaddress_=leastusedaddress; /* Cache these to avoid register reloading */
+ register size_t largestusedblock_=largestusedblock;
+ if(!is_aligned(mem)) return 0; /* Would fail very rarely as all allocators return aligned blocks */
+ if(mem<leastusedaddress_) return 0; /* Simple but effective */
+ {
+ mchunkptr p=mem2chunk(mem);
+ mstate fm=0;
+ int ismmapped=is_mmapped(p);
+ if((!ismmapped && !is_inuse(p)) || (p->head & FLAG4_BIT)) return 0;
+ /* Reduced uncertainty by 0.5^2 = 25.0% */
+ /* size should never exceed largestusedblock */
+ if(chunksize(p)>largestusedblock_) return 0;
+ /* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */
+ /* Having sanity checked prev_foot and head, check next block */
+ if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0;
+ /* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */
+ #if 0
+ /* If previous block is free, check that its next block pointer equals us */
+ if(!ismmapped && !pinuse(p))
+ if(next_chunk(prev_chunk(p))!=p) return 0;
+ /* We could start comparing prev_foot's for similarity but it starts getting slow. */
+ #endif
+ fm = get_mstate_for(p);
+ if(!is_aligned(fm) || (void *)fm<leastusedaddress_) return 0;
+ if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
+ assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
+ if(ok_magic(fm))
+ return fm;
+ }
+ }
+//#ifdef WIN32
+// __except(1) { }
+//#endif
+#endif
+#endif
+#endif
+ }
+ return 0;
+}
+NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC
+{
+ if(mem)
+ {
+ if(isforeign) *isforeign=1;
+#if USE_MAGIC_HEADERS
+ {
+ size_t *_mem=(size_t *) mem-3;
+ if(_mem[0]==*(size_t *) "NEDMALOC")
+ {
+ mstate mspace=(mstate) _mem[1];
+ size_t size=_mem[2];
+ if(isforeign) *isforeign=0;
+ return size;
+ }
+ }
+#elif USE_ALLOCATOR==1
+ if(nedblkmstate(mem))
+ {
+ mchunkptr p=mem2chunk(mem);
+ if(isforeign) *isforeign=0;
+ return chunksize(p)-overhead_for(p);
+ }
+#ifdef DEBUG
+ else
+ {
+ int a=1; /* Set breakpoints here if needed */
+ }
+#endif
+#endif
+#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0
+#ifdef _MSC_VER
+ /* This is the MSVCRT equivalent */
+ return _msize(mem);
+#elif defined(__linux__)
+ /* This is the glibc/ptmalloc2/dlmalloc equivalent. */
+ return malloc_usable_size(mem);
+#elif defined(__FreeBSD__) || defined(__APPLE__)
+ /* This is the BSD libc equivalent. */
+ return malloc_size(mem);
+#else
+#error Cannot tolerate the memory allocator of an unknown system!
+#endif
+#endif
+ }
+ return 0;
+}
+
+NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc((nedpool *) 0, size); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc((nedpool *) 0, no, size); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc((nedpool *) 0, mem, size); }
+NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC { nedpfree((nedpool *) 0, mem); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign((nedpool *) 0, alignment, bytes); }
+NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo((nedpool *) 0); }
+NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt((nedpool *) 0, parno, value); }
+NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim((nedpool *) 0, pad); }
+void nedmalloc_stats() THROWSPEC { nedpmalloc_stats((nedpool *) 0); }
+NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint((nedpool *) 0); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); }
+
+struct threadcacheblk_t;
+typedef struct threadcacheblk_t threadcacheblk;
+struct threadcacheblk_t
+{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
+#ifdef FULLSANITYCHECKS
+ unsigned int magic;
+#endif
+ unsigned int lastUsed, size;
+ threadcacheblk *next, *prev;
+};
+typedef struct threadcache_t
+{
+#ifdef FULLSANITYCHECKS
+ unsigned int magic1;
+#endif
+ int mymspace; /* Last mspace entry this thread used */
+ long threadid;
+ unsigned int mallocs, frees, successes;
+ size_t freeInCache; /* How much free space is stored in this cache */
+ threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
+#ifdef FULLSANITYCHECKS
+ unsigned int magic2;
+#endif
+} threadcache;
+struct nedpool_t
+{
+ MLOCK_T mutex;
+ void *uservalue;
+ int threads; /* Max entries in m to use */
+ threadcache *caches[THREADCACHEMAXCACHES];
+ TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
+ mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */
+};
+static nedpool syspool;
+
+static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC
+{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */
+ unsigned int topbit, size=(unsigned int)(_size>>4);
+ /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */
+
+#if defined(__GNUC__)
+ topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+ {
+ unsigned long bsrTopBit;
+
+ _BitScanReverse(&bsrTopBit, size);
+
+ topbit = bsrTopBit;
+ }
+#else
+#if 0
+ union {
+ unsigned asInt[2];
+ double asDouble;
+ };
+ int n;
+
+ asDouble = (double)size + 0.5;
+ topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
+#else
+ {
+ unsigned int x=size;
+ x = x | (x >> 1);
+ x = x | (x >> 2);
+ x = x | (x >> 4);
+ x = x | (x >> 8);
+ x = x | (x >>16);
+ x = ~x;
+ x = x - ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ x = x + (x << 8);
+ x = x + (x << 16);
+ topbit=31 - (x >> 24);
+ }
+#endif
+#endif
+ return topbit;
+}
+
+
+#ifdef FULLSANITYCHECKS
+static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
+{
+ assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
+ if(ptr[0] && ptr[1])
+ {
+ assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
+ assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
+ assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
+ assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
+ assert(!ptr[0]->prev);
+ assert(!ptr[1]->next);
+ if(ptr[0]==ptr[1])
+ {
+ assert(!ptr[0]->next);
+ assert(!ptr[1]->prev);
+ }
+ }
+}
+static void tcfullsanitycheck(threadcache *tc) THROWSPEC
+{
+ threadcacheblk **tcbptr=tc->bins;
+ int n;
+ for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
+ {
+ threadcacheblk *b, *ob=0;
+ tcsanitycheck(tcbptr);
+ for(b=tcbptr[0]; b; ob=b, b=b->next)
+ {
+ assert(*(unsigned int *) "NEDN"==b->magic);
+ assert(!ob || ob->next==b);
+ assert(!ob || b->prev==ob);
+ }
+ }
+}
+#endif
+
+static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC
+{
+#ifdef FULLSANITYCHECKS
+ tcfullsanitycheck(tc);
+#endif
+ if(tc->freeInCache)
+ {
+ threadcacheblk **tcbptr=tc->bins;
+ int n;
+ for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
+ {
+ threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */
+ /*tcsanitycheck(tcbptr);*/
+ for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
+ {
+ threadcacheblk *f=*tcb;
+ size_t blksize=f->size; /*nedblksize(f);*/
+ assert(blksize<=nedblksize(0, f));
+ assert(blksize);
+#ifdef FULLSANITYCHECKS
+ assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
+#endif
+ *tcb=(*tcb)->prev;
+ if(*tcb)
+ (*tcb)->next=0;
+ else
+ *tcbptr=0;
+ tc->freeInCache-=blksize;
+ assert((long) tc->freeInCache>=0);
+ CallFree(0, f, 0);
+ /*tcsanitycheck(tcbptr);*/
+ }
+ }
+ }
+#ifdef FULLSANITYCHECKS
+ tcfullsanitycheck(tc);
+#endif
+}
+static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC
+{
+ if(p->caches)
+ {
+ threadcache *tc;
+ int n;
+ for(n=0; n<THREADCACHEMAXCACHES; n++)
+ {
+ if((tc=p->caches[n]))
+ {
+ tc->frees++;
+ RemoveCacheEntries(p, tc, 0);
+ assert(!tc->freeInCache);
+ tc->mymspace=-1;
+ tc->threadid=0;
+ CallFree(0, tc, 0);
+ p->caches[n]=0;
+ }
+ }
+ }
+}
+
+static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC
+{
+ threadcache *tc=0;
+ int n, end;
+ ACQUIRE_LOCK(&p->mutex);
+ for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
+ if(THREADCACHEMAXCACHES==n)
+ { /* List exhausted, so disable for this thread */
+ RELEASE_LOCK(&p->mutex);
+ return 0;
+ }
+ tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0);
+ if(!tc)
+ {
+ RELEASE_LOCK(&p->mutex);
+ return 0;
+ }
+#ifdef FULLSANITYCHECKS
+ tc->magic1=*(unsigned int *)"NEDMALC1";
+ tc->magic2=*(unsigned int *)"NEDMALC2";
+#endif
+ tc->threadid=(long)(size_t)CURRENT_THREAD;
+ for(end=0; p->m[end]; end++);
+ tc->mymspace=abs(tc->threadid) % end;
+ RELEASE_LOCK(&p->mutex);
+ if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
+ return tc;
+}
+
+static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC
+{
+ void *RESTRICT ret=0;
+ size_t size=*_size, blksize=0;
+ unsigned int bestsize;
+ unsigned int idx=size2binidx(size);
+ threadcacheblk *RESTRICT blk, **RESTRICT binsptr;
+#ifdef FULLSANITYCHECKS
+ tcfullsanitycheck(tc);
+#endif
+ /* Calculate best fit bin size */
+ bestsize=1<<(idx+4);
+#if 0
+ /* Finer grained bin fit */
+ idx<<=1;
+ if(size>bestsize)
+ {
+ idx++;
+ bestsize+=bestsize>>1;
+ }
+ if(size>bestsize)
+ {
+ idx++;
+ bestsize=1<<(4+(idx>>1));
+ }
+#else
+ if(size>bestsize)
+ {
+ idx++;
+ bestsize<<=1;
+ }
+#endif
+ assert(bestsize>=size);
+ if(size<bestsize) size=bestsize;
+ assert(size<=THREADCACHEMAX);
+ assert(idx<=THREADCACHEMAXBINS);
+ binsptr=&tc->bins[idx*2];
+ /* Try to match close, but move up a bin if necessary */
+ blk=*binsptr;
+ if(!blk || blk->size<size)
+ { /* Bump it up a bin */
+ if(idx<THREADCACHEMAXBINS)
+ {
+ idx++;
+ binsptr+=2;
+ blk=*binsptr;
+ }
+ }
+ if(blk)
+ {
+ blksize=blk->size; /*nedblksize(blk);*/
+ assert(nedblksize(0, blk)>=blksize);
+ assert(blksize>=size);
+ if(blk->next)
+ blk->next->prev=0;
+ *binsptr=blk->next;
+ if(!*binsptr)
+ binsptr[1]=0;
+#ifdef FULLSANITYCHECKS
+ blk->magic=0;
+#endif
+ assert(binsptr[0]!=blk && binsptr[1]!=blk);
+ assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
+ /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/
+ ret=(void *) blk;
+ }
+ ++tc->mallocs;
+ if(ret)
+ {
+ assert(blksize>=size);
+ ++tc->successes;
+ tc->freeInCache-=blksize;
+ assert((long) tc->freeInCache>=0);
+ }
+#if defined(DEBUG) && 0
+ if(!(tc->mallocs & 0xfff))
+ {
+ printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
+ (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
+ }
+#endif
+#ifdef FULLSANITYCHECKS
+ tcfullsanitycheck(tc);
+#endif
+ *_size=size;
+ return ret;
+}
+static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC
+{
+ unsigned int age=THREADCACHEMAXFREESPACE/8192;
+ /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
+ while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
+ {
+ RemoveCacheEntries(p, tc, age);
+ /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
+ age>>=1;
+ }
+ /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
+}
+static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC
+{
+ unsigned int bestsize;
+ unsigned int idx=size2binidx(size);
+ threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem;
+ assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
+#ifdef DEBUG
+ /* Make sure this is a valid memory block */
+ assert(nedblksize(0, mem));
+#endif
+#ifdef FULLSANITYCHECKS
+ tcfullsanitycheck(tc);
+#endif
+ /* Calculate best fit bin size */
+ bestsize=1<<(idx+4);
+#if 0
+ /* Finer grained bin fit */
+ idx<<=1;
+ if(size>bestsize)
+ {
+ unsigned int biggerbestsize=bestsize+bestsize<<1;
+ if(size>=biggerbestsize)
+ {
+ idx++;
+ bestsize=biggerbestsize;
+ }
+ }
+#endif
+ if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */
+ size=bestsize;
+ binsptr=&tc->bins[idx*2];
+ assert(idx<=THREADCACHEMAXBINS);
+ if(tck==*binsptr)
+ {
+ fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck);
+ abort();
+ }
+#ifdef FULLSANITYCHECKS
+ tck->magic=*(unsigned int *) "NEDN";
+#endif
+ tck->lastUsed=++tc->frees;
+ tck->size=(unsigned int) size;
+ tck->next=*binsptr;
+ tck->prev=0;
+ if(tck->next)
+ tck->next->prev=tck;
+ else
+ binsptr[1]=tck;
+ assert(!*binsptr || (*binsptr)->size==tck->size);
+ *binsptr=tck;
+ assert(tck==tc->bins[idx*2]);
+ assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
+ /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
+ tc->freeInCache+=size;
+#ifdef FULLSANITYCHECKS
+ tcfullsanitycheck(tc);
+#endif
+#if 1
+ if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
+ ReleaseFreeInCache(p, tc, mymspace);
+#endif
+}
+
+
+
+
+static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC
+{ /* threads is -1 for system pool */
+ ensure_initialization();
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+ if(p->threads) goto done;
+ if(INITIAL_LOCK(&p->mutex)) goto err;
+ if(TLSALLOC(&p->mycache)) goto err;
+#if USE_ALLOCATOR==0
+ p->m[0]=(mstate) mspacecounter++;
+#elif USE_ALLOCATOR==1
+ if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
+ p->m[0]->extp=p;
+#endif
+ p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
+done:
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ return 1;
+err:
+ if(threads<0)
+ abort(); /* If you can't allocate for system pool, we're screwed */
+ DestroyCaches(p);
+ if(p->m[0])
+ {
+#if USE_ALLOCATOR==1
+ destroy_mspace(p->m[0]);
+#endif
+ p->m[0]=0;
+ }
+ if(p->mycache)
+ {
+ if(TLSFREE(p->mycache)) abort();
+ p->mycache=0;
+ }
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ return 0;
+}
+static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC
+{ /* Gets called when thread's last used mspace is in use. The strategy
+ is to run through the list of all available mspaces looking for an
+ unlocked one and if we fail, we create a new one so long as we don't
+ exceed p->threads */
+ int n, end;
+ for(n=end=*lastUsed+1; p->m[n]; end=++n)
+ {
+ if(TRY_LOCK(&p->m[n]->mutex)) goto found;
+ }
+ for(n=0; n<*lastUsed && p->m[n]; n++)
+ {
+ if(TRY_LOCK(&p->m[n]->mutex)) goto found;
+ }
+ if(end<p->threads)
+ {
+ mstate temp;
+#if USE_ALLOCATOR==0
+ temp=(mstate) mspacecounter++;
+#elif USE_ALLOCATOR==1
+ if(!(temp=(mstate) create_mspace(size, 1)))
+ goto badexit;
+#endif
+ /* Now we're ready to modify the lists, we lock */
+ ACQUIRE_LOCK(&p->mutex);
+ while(p->m[end] && end<p->threads)
+ end++;
+ if(end>=p->threads)
+ { /* Drat, must destroy it now */
+ RELEASE_LOCK(&p->mutex);
+#if USE_ALLOCATOR==1
+ destroy_mspace((mstate) temp);
+#endif
+ goto badexit;
+ }
+ /* We really want to make sure this goes into memory now but we
+ have to be careful of breaking aliasing rules, so write it twice */
+ *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp;
+ ACQUIRE_LOCK(&p->m[end]->mutex);
+ /*printf("Created mspace idx %d\n", end);*/
+ RELEASE_LOCK(&p->mutex);
+ n=end;
+ goto found;
+ }
+ /* Let it lock on the last one it used */
+badexit:
+ ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
+ return p->m[*lastUsed];
+found:
+ *lastUsed=n;
+ if(tc)
+ tc->mymspace=n;
+ else
+ {
+ if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
+ }
+ return p->m[n];
+}
+
+typedef struct PoolList_t
+{
+ size_t size; /* Size of list */
+ size_t length; /* Actual entries in list */
+#ifdef DEBUG
+ nedpool *list[1]; /* Force testing of list expansion */
+#else
+ nedpool *list[16];
+#endif
+} PoolList;
+static MLOCK_T poollistlock;
+static PoolList *poollist;
+NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
+{
+ nedpool *ret=0;
+ if(!poollist)
+ {
+ PoolList *newpoollist=0;
+ if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0;
+ INITIAL_LOCK(&poollistlock);
+ ACQUIRE_LOCK(&poollistlock);
+ poollist=newpoollist;
+ poollist->size=sizeof(poollist->list)/sizeof(nedpool *);
+ }
+ else
+ ACQUIRE_LOCK(&poollistlock);
+ if(poollist->length==poollist->size)
+ {
+ PoolList *newpoollist=0;
+ size_t newsize=0;
+ newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *);
+ if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit;
+ poollist=newpoollist;
+ memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0]));
+ poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1;
+ assert(poollist->size>poollist->length);
+ }
+ if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit;
+ if(!InitPool(ret, capacity, threads))
+ {
+ nedpfree(0, ret);
+ goto badexit;
+ }
+ poollist->list[poollist->length++]=ret;
+badexit:
+ RELEASE_LOCK(&poollistlock);
+ return ret;
+}
+void neddestroypool(nedpool *p) THROWSPEC
+{
+ unsigned int n;
+ ACQUIRE_LOCK(&p->mutex);
+ DestroyCaches(p);
+ for(n=0; p->m[n]; n++)
+ {
+#if USE_ALLOCATOR==1
+ destroy_mspace(p->m[n]);
+#endif
+ p->m[n]=0;
+ }
+ RELEASE_LOCK(&p->mutex);
+ if(TLSFREE(p->mycache)) abort();
+ nedpfree(0, p);
+ ACQUIRE_LOCK(&poollistlock);
+ assert(poollist);
+ for(n=0; n<poollist->length && poollist->list[n]!=p; n++);
+ assert(n!=poollist->length);
+ memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]);
+ if(!--poollist->length)
+ {
+ assert(!poollist->list[0]);
+ nedpfree(0, poollist);
+ poollist=0;
+ }
+ RELEASE_LOCK(&poollistlock);
+}
+void neddestroysyspool() THROWSPEC
+{
+ nedpool *p=&syspool;
+ int n;
+ ACQUIRE_LOCK(&p->mutex);
+ DestroyCaches(p);
+ for(n=0; p->m[n]; n++)
+ {
+#if USE_ALLOCATOR==1
+ destroy_mspace(p->m[n]);
+#endif
+ p->m[n]=0;
+ }
+ /* Render syspool unusable */
+ for(n=0; n<THREADCACHEMAXCACHES; n++)
+ p->caches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
+ for(n=0; n<MAXTHREADSINPOOL+1; n++)
+ p->m[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
+ if(TLSFREE(p->mycache)) abort();
+ RELEASE_LOCK(&p->mutex);
+}
+nedpool **nedpoollist() THROWSPEC
+{
+ nedpool **ret=0;
+ if(poollist)
+ {
+ ACQUIRE_LOCK(&poollistlock);
+ if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit;
+ memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *));
+badexit:
+ RELEASE_LOCK(&poollistlock);
+ }
+ return ret;
+}
+
+void nedpsetvalue(nedpool *p, void *v) THROWSPEC
+{
+ if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+ p->uservalue=v;
+}
+void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
+{
+ nedpool *np=0;
+ mstate fm=nedblkmstate(mem);
+ if(!fm || !fm->extp) return 0;
+ np=(nedpool *) fm->extp;
+ if(p) *p=np;
+ return np->uservalue;
+}
+
+void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC
+{
+ int mycache;
+ if(!p)
+ {
+ p=&syspool;
+ if(!syspool.threads) InitPool(&syspool, 0, -1);
+ }
+ mycache=(int)(size_t) TLSGET(p->mycache);
+ if(!mycache)
+ { /* Set to mspace 0 */
+ if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort();
+ }
+ else if(mycache>0)
+ { /* Set to last used mspace */
+ threadcache *tc=p->caches[mycache-1];
+#if defined(DEBUG)
+ printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
+ 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
+#endif
+ if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
+ tc->frees++;
+ RemoveCacheEntries(p, tc, 0);
+ assert(!tc->freeInCache);
+ if(disable)
+ {
+ tc->mymspace=-1;
+ tc->threadid=0;
+ CallFree(0, p->caches[mycache-1], 0);
+ p->caches[mycache-1]=0;
+ }
+ }
+}
+void neddisablethreadcache(nedpool *p) THROWSPEC
+{
+ nedtrimthreadcache(p, 1);
+}
+
+#define GETMSPACE(m,p,tc,ms,s,action) \
+ do \
+ { \
+ mstate m = GetMSpace((p),(tc),(ms),(s)); \
+ action; \
+ if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \
+ } while (0)
+
+static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC
+{ /* Returns a locked and ready for use mspace */
+ mstate m=p->m[mymspace];
+ assert(m);
+#if USE_ALLOCATOR==1
+ if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);
+ /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
+#endif
+ return m;
+}
+static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC
+{
+ *p=&syspool;
+ if(!syspool.threads) InitPool(&syspool, 0, -1);
+}
+static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC
+{
+ if(!mycache)
+ { /* Need to allocate a new cache */
+ *tc=AllocCache(*p);
+ if(!*tc)
+ { /* Disable */
+ if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort();
+ *mymspace=0;
+ }
+ else
+ *mymspace=(*tc)->mymspace;
+ }
+ else
+ { /* Cache disabled, but we do have an assigned thread pool */
+ *tc=0;
+ *mymspace=-mycache-1;
+ }
+}
+static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC
+{
+ int mycache;
+ if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
+ if(!*p)
+ GetThreadCache_cold1(p);
+ mycache=(int)(size_t) TLSGET((*p)->mycache);
+ if(mycache>0)
+ { /* Already have a cache */
+ *tc=(*p)->caches[mycache-1];
+ *mymspace=(*tc)->mymspace;
+ }
+ else GetThreadCache_cold2(p, tc, mymspace, mycache);
+ assert(*mymspace>=0);
+ assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid);
+#ifdef FULLSANITYCHECKS
+ if(*tc)
+ {
+ if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
+ {
+ abort();
+ }
+ }
+#endif
+}
+
+NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
+{
+ void *ret=0;
+ threadcache *tc;
+ int mymspace;
+ GetThreadCache(&p, &tc, &mymspace, &size);
+#if THREADCACHEMAX
+ if(tc && size<=THREADCACHEMAX)
+ { /* Use the thread cache */
+ ret=threadcache_malloc(p, tc, &size);
+ }
+#endif
+ if(!ret)
+ { /* Use this thread's mspace */
+ GETMSPACE(m, p, tc, mymspace, size,
+ ret=CallMalloc(m, size, 0));
+ }
+ return ret;
+}
+NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
+{
+ size_t rsize=size*no;
+ void *ret=0;
+ threadcache *tc;
+ int mymspace;
+ GetThreadCache(&p, &tc, &mymspace, &rsize);
+#if THREADCACHEMAX
+ if(tc && rsize<=THREADCACHEMAX)
+ { /* Use the thread cache */
+ if((ret=threadcache_malloc(p, tc, &rsize)))
+ memset(ret, 0, rsize);
+ }
+#endif
+ if(!ret)
+ { /* Use this thread's mspace */
+ GETMSPACE(m, p, tc, mymspace, rsize,
+ ret=CallCalloc(m, rsize, 0));
+ }
+ return ret;
+}
+NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
+{
+ void *ret=0;
+ threadcache *tc;
+ int mymspace, isforeign=1;
+ size_t memsize;
+ if(!mem) return nedpmalloc(p, size);
+ memsize=nedblksize(&isforeign, mem);
+ assert(memsize);
+ if(!memsize)
+ {
+ fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n");
+ abort();
+ }
+ else if(size<=memsize && memsize-size<
+#ifdef DEBUG
+ 32
+#else
+ 1024
+#endif
+ ) /* If realloc size is within 1Kb smaller than existing, noop it */
+ return mem;
+ GetThreadCache(&p, &tc, &mymspace, &size);
+#if THREADCACHEMAX
+ if(tc && size && size<=THREADCACHEMAX)
+ { /* Use the thread cache */
+ if((ret=threadcache_malloc(p, tc, &size)))
+ {
+ memcpy(ret, mem, memsize<size ? memsize : size);
+ if(memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
+ threadcache_free(p, tc, mymspace, mem, memsize);
+ else
+ CallFree(0, mem, isforeign);
+ }
+ }
+#endif
+ if(!ret)
+ { /* Reallocs always happen in the mspace they happened in, so skip
+ locking the preferred mspace for this thread */
+ ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size);
+ }
+ return ret;
+}
+void nedpfree(nedpool *p, void *mem) THROWSPEC
+{ /* Frees always happen in the mspace they happened in, so skip
+ locking the preferred mspace for this thread */
+ threadcache *tc;
+ int mymspace, isforeign=1;
+ size_t memsize;
+ if(!mem)
+ { /* If you tried this on FreeBSD you'd be sorry! */
+#ifdef DEBUG
+ fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n");
+#endif
+ return;
+ }
+ memsize=nedblksize(&isforeign, mem);
+ assert(memsize);
+ if(!memsize)
+ {
+ fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n");
+ abort();
+ }
+ GetThreadCache(&p, &tc, &mymspace, 0);
+#if THREADCACHEMAX
+ if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
+ threadcache_free(p, tc, mymspace, mem, memsize);
+ else
+#endif
+ CallFree(0, mem, isforeign);
+}
+NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
+{
+ void *ret;
+ threadcache *tc;
+ int mymspace;
+ GetThreadCache(&p, &tc, &mymspace, &bytes);
+ { /* Use this thread's mspace */
+ GETMSPACE(m, p, tc, mymspace, bytes,
+ ret=CallMalloc(m, bytes, alignment));
+ }
+ return ret;
+}
+struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC
+{
+ int n;
+ struct nedmallinfo ret={0};
+ if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+ for(n=0; p->m[n]; n++)
+ {
+#if USE_ALLOCATOR==1 && !NO_MALLINFO
+ struct mallinfo t=mspace_mallinfo(p->m[n]);
+ ret.arena+=t.arena;
+ ret.ordblks+=t.ordblks;
+ ret.hblkhd+=t.hblkhd;
+ ret.usmblks+=t.usmblks;
+ ret.uordblks+=t.uordblks;
+ ret.fordblks+=t.fordblks;
+ ret.keepcost+=t.keepcost;
+#endif
+ }
+ return ret;
+}
+int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
+{
+#if USE_ALLOCATOR==1
+ return mspace_mallopt(parno, value);
+#else
+ return 0;
+#endif
+}
+NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC
+{
+#if USE_ALLOCATOR==1
+ if(granularity) *granularity=mparams.granularity;
+ if(magic) *magic=mparams.magic;
+ return (void *) &syspool;
+#else
+ if(granularity) *granularity=0;
+ if(magic) *magic=0;
+ return 0;
+#endif
+}
+int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
+{
+ int n, ret=0;
+ if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+ for(n=0; p->m[n]; n++)
+ {
+#if USE_ALLOCATOR==1
+ ret+=mspace_trim(p->m[n], pad);
+#endif
+ }
+ return ret;
+}
+void nedpmalloc_stats(nedpool *p) THROWSPEC
+{
+ int n;
+ if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+ for(n=0; p->m[n]; n++)
+ {
+#if USE_ALLOCATOR==1
+ mspace_malloc_stats(p->m[n]);
+#endif
+ }
+}
+size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
+{
+ size_t ret=0;
+ int n;
+ if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+ for(n=0; p->m[n]; n++)
+ {
+#if USE_ALLOCATOR==1
+ ret+=mspace_footprint(p->m[n]);
+#endif
+ }
+ return ret;
+}
+NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
+{
+ void **ret;
+ threadcache *tc;
+ int mymspace;
+ GetThreadCache(&p, &tc, &mymspace, &elemsize);
+#if USE_ALLOCATOR==0
+ GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
+ ret=unsupported_operation("independent_calloc"));
+#elif USE_ALLOCATOR==1
+ GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
+ ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
+#endif
+ return ret;
+}
+NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
+{
+ void **ret;
+ threadcache *tc;
+ int mymspace;
+ size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
+ if(!adjustedsizes) return 0;
+ for(i=0; i<elems; i++)
+ adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
+ GetThreadCache(&p, &tc, &mymspace, 0);
+#if USE_ALLOCATOR==0
+ GETMSPACE(m, p, tc, mymspace, 0,
+ ret=unsupported_operation("independent_comalloc"));
+#elif USE_ALLOCATOR==1
+ GETMSPACE(m, p, tc, mymspace, 0,
+ ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
+#endif
+ return ret;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif
diff --git a/drivers/nedmalloc/nedmalloc.h b/drivers/nedmalloc/nedmalloc.h
index b9add1683a..7ec65849fc 100644
--- a/drivers/nedmalloc/nedmalloc.h
+++ b/drivers/nedmalloc/nedmalloc.h
@@ -1,302 +1,302 @@
-#ifdef NEDMALLOC_ENABLED
-
-/* nedalloc, an alternative malloc implementation for multiple threads without
-lock contention based on dlmalloc v2.8.3. (C) 2005-2009 Niall Douglas
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef NEDMALLOC_H
-#define NEDMALLOC_H
-
-#include "typedefs.h"
-#define MALLOC_ALIGNMENT DEFAULT_ALIGNMENT
-
-#ifdef PSP_ENABLED
-#define USE_LOCKS 0
-#define HAVE_MMAP 0
-#endif
-
-/* See malloc.c.h for what each function does.
-
-REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called
-malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want
-this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries
-to replace usage of the system allocator.
-
-NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc
-namespace when in C++ (uses the global namespace instead).
-
-NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or
-__attribute__ ((visibility("default"))) or whatever you like. It defaults
-to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building
-nedmalloc.dll.
-
-USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK,
-ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER.
-
-NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc
-than for the rest of the build. Remember to set NDEBUG to disable all assertion
-checking too.
-
-USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t)
-to each block. nedpfree() and nedprealloc() can then automagically know when
-to free a system allocated block. Enabling this typically adds 20-50% to
-application memory usage.
-
-ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR
-is set or the Windows DLL is being built. This causes nedmalloc to detect when a
-system allocator block is passed to it and to handle it appropriately. Note that
-without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault
-on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there
-is no comparable system on POSIX).
-
-USE_ALLOCATOR can be one of these settings (it defaults to 1):
- 0: System allocator (nedmalloc now simply acts as a threadcache).
- WARNING: Intended for DEBUG USE ONLY - not all functions work correctly.
- 1: dlmalloc
-
-ENABLE_LARGE_PAGES enables support for requesting memory from the system in large
-(typically >=2Mb) pages if the host OS supports this. These occupy just a single
-TLB entry and can significantly improve performance in large working set applications.
-
-ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated
-by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc
-blocks, but it assumes that the NT and glibc heaps function in a very specific
-fashion which may not hold true across OS upgrades.
-*/
-
-#include <stddef.h> /* for size_t */
-
-#ifndef NEDMALLOCEXTSPEC
- #ifdef NEDMALLOC_DLL_EXPORTS
- #ifdef WIN32
- #define NEDMALLOCEXTSPEC extern __declspec(dllexport)
- #elif defined(__GNUC__)
- #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default")))
- #endif
- #ifndef ENABLE_TOLERANT_NEDMALLOC
- #define ENABLE_TOLERANT_NEDMALLOC 1
- #endif
- #else
- #define NEDMALLOCEXTSPEC extern
- #endif
-#endif
-
-#if __STDC_VERSION__ >= 199901L /* C99 or better */
- #define RESTRICT restrict
-#else
- #if defined(_MSC_VER) && _MSC_VER>=1400
- #define RESTRICT __restrict
- #endif
- #ifdef __GNUC__
- #define RESTRICT __restrict
- #endif
-#endif
-#ifndef RESTRICT
- #define RESTRICT
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER>=1400
- #define NEDMALLOCPTRATTR __declspec(restrict)
- #define NEDMALLOCNOALIASATTR __declspec(noalias)
-#endif
-#ifdef __GNUC__
- #define NEDMALLOCPTRATTR __attribute__ ((malloc))
-#endif
-#ifndef NEDMALLOCPTRATTR
- #define NEDMALLOCPTRATTR
-#endif
-#ifndef NEDMALLOCNOALIASATTR
- #define NEDMALLOCNOALIASATTR
-#endif
-
-#ifndef USE_MAGIC_HEADERS
- #define USE_MAGIC_HEADERS 0
-#endif
-
-#ifndef USE_ALLOCATOR
- #define USE_ALLOCATOR 1 /* dlmalloc */
-#endif
-
-#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS
-#error If you are using the system allocator then you MUST use magic headers
-#endif
-
-#ifdef REPLACE_SYSTEM_ALLOCATOR
- #if USE_ALLOCATOR==0
- #error Cannot combine using the system allocator with replacing the system allocator
- #endif
- #ifndef ENABLE_TOLERANT_NEDMALLOC
- #define ENABLE_TOLERANT_NEDMALLOC 1
- #endif
- #ifndef WIN32 /* We have a dedicated patcher for Windows */
- #define nedmalloc malloc
- #define nedcalloc calloc
- #define nedrealloc realloc
- #define nedfree free
- #define nedmemalign memalign
- #define nedmallinfo mallinfo
- #define nedmallopt mallopt
- #define nedmalloc_trim malloc_trim
- #define nedmalloc_stats malloc_stats
- #define nedmalloc_footprint malloc_footprint
- #define nedindependent_calloc independent_calloc
- #define nedindependent_comalloc independent_comalloc
- #ifdef _MSC_VER
- #define nedblksize _msize
- #endif
- #endif
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-struct nedmallinfo {
- size_t arena; /* non-mmapped space allocated from system */
- size_t ordblks; /* number of free chunks */
- size_t smblks; /* always 0 */
- size_t hblks; /* always 0 */
- size_t hblkhd; /* space in mmapped regions */
- size_t usmblks; /* maximum total allocated space */
- size_t fsmblks; /* always 0 */
- size_t uordblks; /* total allocated space */
- size_t fordblks; /* total free space */
- size_t keepcost; /* releasable (via malloc_trim) space */
-};
-#if defined(__cplusplus)
-}
-#endif
-
-#if defined(__cplusplus)
- #if !defined(NO_NED_NAMESPACE)
-namespace nedalloc {
- #else
-extern "C" {
- #endif
- #define THROWSPEC throw()
-#else
- #define THROWSPEC
-#endif
-
-/* These are the global functions */
-
-/* Gets the usable size of an allocated block. Note this will always be bigger than what was
-asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the
-system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows
-systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS.
-*/
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC;
-
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC;
-
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC;
-NEDMALLOCEXTSPEC void nedmalloc_stats(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC;
-
-/* Destroys the system memory pool used by the functions above.
-Useful for when you have nedmalloc in a DLL you're about to unload.
-If you call ANY nedmalloc functions after calling this you will
-get a fatal exception!
-*/
-NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC;
-
-/* These are the pool functions */
-struct nedpool_t;
-typedef struct nedpool_t nedpool;
-
-/* Creates a memory pool for use with the nedp* functions below.
-Capacity is how much to allocate immediately (if you know you'll be allocating a lot
-of memory very soon) which you can leave at zero. Threads specifies how many threads
-will *normally* be accessing the pool concurrently. Setting this to zero means it
-extends on demand, but be careful of this as it can rapidly consume system resources
-where bursts of concurrent threads use a pool at once.
-*/
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC;
-
-/* Destroys a memory pool previously created by nedcreatepool().
-*/
-NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC;
-
-/* Returns a zero terminated snapshot of threadpools existing at the time of call. Call
-nedfree() on the returned list when you are done. Returns zero if there is only the
-system pool in existence.
-*/
-NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC;
-
-/* Sets a value to be associated with a pool. You can retrieve this value by passing
-any memory block allocated from that pool.
-*/
-NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC;
-
-/* Gets a previously set value using nedpsetvalue() or zero if memory is unknown.
-Optionally can also retrieve pool. You can detect an unknown block by the return
-being zero and *p being unmodifed.
-*/
-NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC;
-
-/* Trims the thread cache for the calling thread, returning any existing cache
-data to the central pool. Remember to ALWAYS call with zero if you used the
-system pool. Setting disable to non-zero replicates neddisablethreadcache().
-*/
-NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC;
-
-/* Disables the thread cache for the calling thread, returning any existing cache
-data to the central pool. Remember to ALWAYS call with zero if you used the
-system pool.
-*/
-NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC;
-
-
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC;
-NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC;
-NEDMALLOCEXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC;
-NEDMALLOCEXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC;
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
-
-#endif
+#ifdef NEDMALLOC_ENABLED
+
+/* nedalloc, an alternative malloc implementation for multiple threads without
+lock contention based on dlmalloc v2.8.3. (C) 2005-2009 Niall Douglas
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef NEDMALLOC_H
+#define NEDMALLOC_H
+
+#include "typedefs.h"
+#define MALLOC_ALIGNMENT DEFAULT_ALIGNMENT
+
+#ifdef PSP_ENABLED
+#define USE_LOCKS 0
+#define HAVE_MMAP 0
+#endif
+
+/* See malloc.c.h for what each function does.
+
+REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called
+malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want
+this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries
+to replace usage of the system allocator.
+
+NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc
+namespace when in C++ (uses the global namespace instead).
+
+NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or
+__attribute__ ((visibility("default"))) or whatever you like. It defaults
+to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building
+nedmalloc.dll.
+
+USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK,
+ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER.
+
+NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc
+than for the rest of the build. Remember to set NDEBUG to disable all assertion
+checking too.
+
+USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t)
+to each block. nedpfree() and nedprealloc() can then automagically know when
+to free a system allocated block. Enabling this typically adds 20-50% to
+application memory usage.
+
+ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR
+is set or the Windows DLL is being built. This causes nedmalloc to detect when a
+system allocator block is passed to it and to handle it appropriately. Note that
+without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault
+on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there
+is no comparable system on POSIX).
+
+USE_ALLOCATOR can be one of these settings (it defaults to 1):
+ 0: System allocator (nedmalloc now simply acts as a threadcache).
+ WARNING: Intended for DEBUG USE ONLY - not all functions work correctly.
+ 1: dlmalloc
+
+ENABLE_LARGE_PAGES enables support for requesting memory from the system in large
+(typically >=2Mb) pages if the host OS supports this. These occupy just a single
+TLB entry and can significantly improve performance in large working set applications.
+
+ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated
+by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc
+blocks, but it assumes that the NT and glibc heaps function in a very specific
+fashion which may not hold true across OS upgrades.
+*/
+
+#include <stddef.h> /* for size_t */
+
+#ifndef NEDMALLOCEXTSPEC
+ #ifdef NEDMALLOC_DLL_EXPORTS
+ #ifdef WIN32
+ #define NEDMALLOCEXTSPEC extern __declspec(dllexport)
+ #elif defined(__GNUC__)
+ #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default")))
+ #endif
+ #ifndef ENABLE_TOLERANT_NEDMALLOC
+ #define ENABLE_TOLERANT_NEDMALLOC 1
+ #endif
+ #else
+ #define NEDMALLOCEXTSPEC extern
+ #endif
+#endif
+
+#if __STDC_VERSION__ >= 199901L /* C99 or better */
+ #define RESTRICT restrict
+#else
+ #if defined(_MSC_VER) && _MSC_VER>=1400
+ #define RESTRICT __restrict
+ #endif
+ #ifdef __GNUC__
+ #define RESTRICT __restrict
+ #endif
+#endif
+#ifndef RESTRICT
+ #define RESTRICT
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER>=1400
+ #define NEDMALLOCPTRATTR __declspec(restrict)
+ #define NEDMALLOCNOALIASATTR __declspec(noalias)
+#endif
+#ifdef __GNUC__
+ #define NEDMALLOCPTRATTR __attribute__ ((malloc))
+#endif
+#ifndef NEDMALLOCPTRATTR
+ #define NEDMALLOCPTRATTR
+#endif
+#ifndef NEDMALLOCNOALIASATTR
+ #define NEDMALLOCNOALIASATTR
+#endif
+
+#ifndef USE_MAGIC_HEADERS
+ #define USE_MAGIC_HEADERS 0
+#endif
+
+#ifndef USE_ALLOCATOR
+ #define USE_ALLOCATOR 1 /* dlmalloc */
+#endif
+
+#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS
+#error If you are using the system allocator then you MUST use magic headers
+#endif
+
+#ifdef REPLACE_SYSTEM_ALLOCATOR
+ #if USE_ALLOCATOR==0
+ #error Cannot combine using the system allocator with replacing the system allocator
+ #endif
+ #ifndef ENABLE_TOLERANT_NEDMALLOC
+ #define ENABLE_TOLERANT_NEDMALLOC 1
+ #endif
+ #ifndef WIN32 /* We have a dedicated patcher for Windows */
+ #define nedmalloc malloc
+ #define nedcalloc calloc
+ #define nedrealloc realloc
+ #define nedfree free
+ #define nedmemalign memalign
+ #define nedmallinfo mallinfo
+ #define nedmallopt mallopt
+ #define nedmalloc_trim malloc_trim
+ #define nedmalloc_stats malloc_stats
+ #define nedmalloc_footprint malloc_footprint
+ #define nedindependent_calloc independent_calloc
+ #define nedindependent_comalloc independent_comalloc
+ #ifdef _MSC_VER
+ #define nedblksize _msize
+ #endif
+ #endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+struct nedmallinfo {
+ size_t arena; /* non-mmapped space allocated from system */
+ size_t ordblks; /* number of free chunks */
+ size_t smblks; /* always 0 */
+ size_t hblks; /* always 0 */
+ size_t hblkhd; /* space in mmapped regions */
+ size_t usmblks; /* maximum total allocated space */
+ size_t fsmblks; /* always 0 */
+ size_t uordblks; /* total allocated space */
+ size_t fordblks; /* total free space */
+ size_t keepcost; /* releasable (via malloc_trim) space */
+};
+#if defined(__cplusplus)
+}
+#endif
+
+#if defined(__cplusplus)
+ #if !defined(NO_NED_NAMESPACE)
+namespace nedalloc {
+ #else
+extern "C" {
+ #endif
+ #define THROWSPEC throw()
+#else
+ #define THROWSPEC
+#endif
+
+/* These are the global functions */
+
+/* Gets the usable size of an allocated block. Note this will always be bigger than what was
+asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the
+system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows
+systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS.
+*/
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC;
+
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC;
+
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC;
+NEDMALLOCEXTSPEC void nedmalloc_stats(void) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC;
+
+/* Destroys the system memory pool used by the functions above.
+Useful for when you have nedmalloc in a DLL you're about to unload.
+If you call ANY nedmalloc functions after calling this you will
+get a fatal exception!
+*/
+NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC;
+
+/* These are the pool functions */
+struct nedpool_t;
+typedef struct nedpool_t nedpool;
+
+/* Creates a memory pool for use with the nedp* functions below.
+Capacity is how much to allocate immediately (if you know you'll be allocating a lot
+of memory very soon) which you can leave at zero. Threads specifies how many threads
+will *normally* be accessing the pool concurrently. Setting this to zero means it
+extends on demand, but be careful of this as it can rapidly consume system resources
+where bursts of concurrent threads use a pool at once.
+*/
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC;
+
+/* Destroys a memory pool previously created by nedcreatepool().
+*/
+NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC;
+
+/* Returns a zero terminated snapshot of threadpools existing at the time of call. Call
+nedfree() on the returned list when you are done. Returns zero if there is only the
+system pool in existence.
+*/
+NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC;
+
+/* Sets a value to be associated with a pool. You can retrieve this value by passing
+any memory block allocated from that pool.
+*/
+NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC;
+
+/* Gets a previously set value using nedpsetvalue() or zero if memory is unknown.
+Optionally can also retrieve pool. You can detect an unknown block by the return
+being zero and *p being unmodifed.
+*/
+NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC;
+
+/* Trims the thread cache for the calling thread, returning any existing cache
+data to the central pool. Remember to ALWAYS call with zero if you used the
+system pool. Setting disable to non-zero replicates neddisablethreadcache().
+*/
+NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC;
+
+/* Disables the thread cache for the calling thread, returning any existing cache
+data to the central pool. Remember to ALWAYS call with zero if you used the
+system pool.
+*/
+NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC;
+
+
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC;
+NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC;
+NEDMALLOCEXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC;
+NEDMALLOCEXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC;
+NEDMALLOCEXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC;
+NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
+
+#endif
diff --git a/drivers/openssl/register_openssl.cpp b/drivers/openssl/register_openssl.cpp
index a4a60813b6..ed2150bef5 100644
--- a/drivers/openssl/register_openssl.cpp
+++ b/drivers/openssl/register_openssl.cpp
@@ -1,19 +1,19 @@
-#include "register_openssl.h"
-
-#include "stream_peer_openssl.h"
-#ifdef OPENSSL_ENABLED
-
-void register_openssl() {
-
- ObjectTypeDB::register_type<StreamPeerOpenSSL>();
- StreamPeerOpenSSL::initialize_ssl();
-
-}
-
-void unregister_openssl() {
-
- StreamPeerOpenSSL::finalize_ssl();
-
-}
-#endif
-
+#include "register_openssl.h"
+
+#include "stream_peer_openssl.h"
+#ifdef OPENSSL_ENABLED
+
+void register_openssl() {
+
+ ObjectTypeDB::register_type<StreamPeerOpenSSL>();
+ StreamPeerOpenSSL::initialize_ssl();
+
+}
+
+void unregister_openssl() {
+
+ StreamPeerOpenSSL::finalize_ssl();
+
+}
+#endif
+
diff --git a/drivers/openssl/register_openssl.h b/drivers/openssl/register_openssl.h
index e1c554ca4a..e547a2b750 100644
--- a/drivers/openssl/register_openssl.h
+++ b/drivers/openssl/register_openssl.h
@@ -1,11 +1,11 @@
-#ifndef REGISTER_OPENSSL_H
-#define REGISTER_OPENSSL_H
-
-#ifdef OPENSSL_ENABLED
-
-void register_openssl();
-void unregister_openssl();
-
-#endif
-
-#endif // REGISTER_OPENSSL_H
+#ifndef REGISTER_OPENSSL_H
+#define REGISTER_OPENSSL_H
+
+#ifdef OPENSSL_ENABLED
+
+void register_openssl();
+void unregister_openssl();
+
+#endif
+
+#endif // REGISTER_OPENSSL_H
diff --git a/drivers/opus/SCsub b/drivers/opus/SCsub
new file mode 100644
index 0000000000..a2bebf62b3
--- /dev/null
+++ b/drivers/opus/SCsub
@@ -0,0 +1,200 @@
+Import('env')
+
+opus_sources = [
+ "opus/audio_stream_opus.cpp",
+]
+
+opus_sources_silk=[]
+
+opus_sources_lib = [
+ "opus/celt/bands.c",
+ "opus/celt/celt_lpc.c",
+ "opus/celt/entenc.c",
+ "opus/celt/mdct.c",
+ "opus/celt/quant_bands.c",
+ "opus/celt/celt.c",
+ "opus/celt/cwrs.c",
+ "opus/celt/kiss_fft.c",
+ "opus/celt/modes.c",
+ "opus/celt/rate.c",
+ "opus/celt/celt_decoder.c",
+ "opus/celt/entcode.c",
+ "opus/celt/laplace.c",
+ #opus/celt/opus_custom_demo.c",
+ "opus/celt/vq.c",
+ "opus/celt/celt_encoder.c",
+ "opus/celt/entdec.c",
+ "opus/celt/mathops.c",
+ "opus/celt/pitch.c",
+ "opus/silk/A2NLSF.c",
+ "opus/silk/decoder_set_fs.c",
+ "opus/silk/NLSF_stabilize.c",
+ "opus/silk/sigm_Q15.c",
+ "opus/silk/ana_filt_bank_1.c",
+ "opus/silk/enc_API.c",
+ "opus/silk/NLSF_unpack.c",
+ "opus/silk/sort.c",
+ "opus/silk/biquad_alt.c",
+ "opus/silk/encode_indices.c",
+ "opus/silk/NLSF_VQ.c",
+ "opus/silk/stereo_decode_pred.c",
+ "opus/silk/bwexpander_32.c",
+ "opus/silk/encode_pulses.c",
+ "opus/silk/NLSF_VQ_weights_laroia.c",
+ "opus/silk/stereo_encode_pred.c",
+ "opus/silk/bwexpander.c",
+ "opus/silk/gain_quant.c",
+ "opus/silk/NSQ.c",
+ "opus/silk/stereo_find_predictor.c",
+ "opus/silk/check_control_input.c",
+ "opus/silk/HP_variable_cutoff.c",
+ "opus/silk/NSQ_del_dec.c",
+ "opus/silk/stereo_LR_to_MS.c",
+ "opus/silk/CNG.c",
+ "opus/silk/init_decoder.c",
+ "opus/silk/pitch_est_tables.c",
+ "opus/silk/stereo_MS_to_LR.c",
+ "opus/silk/code_signs.c",
+ "opus/silk/init_encoder.c",
+ "opus/silk/PLC.c",
+ "opus/silk/stereo_quant_pred.c",
+ "opus/silk/control_audio_bandwidth.c",
+ "opus/silk/inner_prod_aligned.c",
+ "opus/silk/process_NLSFs.c",
+ "opus/silk/sum_sqr_shift.c",
+ "opus/silk/control_codec.c",
+ "opus/silk/interpolate.c",
+ "opus/silk/quant_LTP_gains.c",
+ "opus/silk/table_LSF_cos.c",
+ "opus/silk/control_SNR.c",
+ "opus/silk/lin2log.c",
+ "opus/silk/resampler.c",
+ "opus/silk/tables_gain.c",
+ "opus/silk/debug.c",
+ "opus/silk/log2lin.c",
+ "opus/silk/resampler_down2_3.c",
+ "opus/silk/tables_LTP.c",
+ "opus/silk/dec_API.c",
+ "opus/silk/LPC_analysis_filter.c",
+ "opus/silk/resampler_down2.c",
+ "opus/silk/tables_NLSF_CB_NB_MB.c",
+ "opus/silk/decode_core.c",
+ "opus/silk/LPC_inv_pred_gain.c",
+ "opus/silk/resampler_private_AR2.c",
+ "opus/silk/tables_NLSF_CB_WB.c",
+ "opus/silk/decode_frame.c",
+ "opus/silk/LP_variable_cutoff.c",
+ "opus/silk/resampler_private_down_FIR.c",
+ "opus/silk/tables_other.c",
+ "opus/silk/decode_indices.c",
+ "opus/silk/NLSF2A.c",
+ "opus/silk/resampler_private_IIR_FIR.c",
+ "opus/silk/tables_pitch_lag.c",
+ "opus/silk/decode_parameters.c",
+ "opus/silk/NLSF_decode.c",
+ "opus/silk/resampler_private_up2_HQ.c",
+ "opus/silk/tables_pulses_per_block.c",
+ "opus/silk/decode_pitch.c",
+ "opus/silk/NLSF_del_dec_quant.c",
+ "opus/silk/resampler_rom.c",
+ "opus/silk/VAD.c",
+ "opus/silk/decode_pulses.c",
+ "opus/silk/NLSF_encode.c",
+ "opus/silk/shell_coder.c",
+ "opus/silk/VQ_WMat_EC.c",
+ "opus/analysis.c",
+ "opus/internal.c",
+ "opus/opus.c",
+ #"opus/opus_demo.c",
+ "opus/opus_multistream.c",
+ "opus/repacketizer.c",
+ "opus/wincerts.c",
+ "opus/http.c",
+ "opus/mlp.c",
+ #"opus/opus_compare.c",
+ "opus/opus_encoder.c",
+ "opus/opus_multistream_decoder.c",
+ #"opus/repacketizer_demo.c",
+ "opus/info.c",
+ "opus/mlp_data.c",
+ "opus/opus_decoder.c",
+ "opus/opusfile.c",
+ "opus/opus_multistream_encoder.c",
+ "opus/stream.c"
+]
+
+if("opus_fixed_point" in env and env.opus_fixed_point=="yes"):
+ env.Append(CPPPATH=["#drivers/opus/silk/fixed"], CFLAGS=["-DOPUS_FIXED_POINT"])
+ opus_sources_silk = [
+ "opus/silk/fixed/apply_sine_window_FIX.c",
+ "opus/silk/fixed/k2a_FIX.c",
+ "opus/silk/fixed/residual_energy16_FIX.c",
+ "opus/silk/fixed/autocorr_FIX.c",
+ "opus/silk/fixed/k2a_Q16_FIX.c",
+ "opus/silk/fixed/residual_energy_FIX.c",
+ "opus/silk/fixed/burg_modified_FIX.c",
+ "opus/silk/fixed/LTP_analysis_filter_FIX.c",
+ "opus/silk/fixed/schur64_FIX.c",
+ "opus/silk/fixed/corrMatrix_FIX.c",
+ "opus/silk/fixed/LTP_scale_ctrl_FIX.c",
+ "opus/silk/fixed/schur_FIX.c",
+ "opus/silk/fixed/encode_frame_FIX.c",
+ "opus/silk/fixed/noise_shape_analysis_FIX.c",
+ "opus/silk/fixed/solve_LS_FIX.c",
+ "opus/silk/fixed/find_LPC_FIX.c",
+ "opus/silk/fixed/pitch_analysis_core_FIX.c",
+ "opus/silk/fixed/vector_ops_FIX.c",
+ "opus/silk/fixed/find_LTP_FIX.c",
+ "opus/silk/fixed/prefilter_FIX.c",
+ "opus/silk/fixed/warped_autocorrelation_FIX.c",
+ "opus/silk/fixed/find_pitch_lags_FIX.c",
+ "opus/silk/fixed/process_gains_FIX.c",
+ "opus/silk/fixed/find_pred_coefs_FIX.c",
+ "opus/silk/fixed/regularize_correlations_FIX.c"
+ ]
+else:
+ env.Append(CPPPATH=["#drivers/opus/silk/float"])
+ opus_sources_silk = [
+ "opus/silk/float/apply_sine_window_FLP.c",
+ "opus/silk/float/inner_product_FLP.c",
+ "opus/silk/float/regularize_correlations_FLP.c",
+ "opus/silk/float/autocorrelation_FLP.c",
+ "opus/silk/float/k2a_FLP.c",
+ "opus/silk/float/residual_energy_FLP.c",
+ "opus/silk/float/burg_modified_FLP.c",
+ "opus/silk/float/levinsondurbin_FLP.c",
+ "opus/silk/float/scale_copy_vector_FLP.c",
+ "opus/silk/float/bwexpander_FLP.c",
+ "opus/silk/float/LPC_analysis_filter_FLP.c",
+ "opus/silk/float/scale_vector_FLP.c",
+ "opus/silk/float/corrMatrix_FLP.c",
+ "opus/silk/float/LPC_inv_pred_gain_FLP.c",
+ "opus/silk/float/schur_FLP.c",
+ "opus/silk/float/encode_frame_FLP.c",
+ "opus/silk/float/LTP_analysis_filter_FLP.c",
+ "opus/silk/float/solve_LS_FLP.c",
+ "opus/silk/float/energy_FLP.c",
+ "opus/silk/float/LTP_scale_ctrl_FLP.c",
+ "opus/silk/float/sort_FLP.c",
+ "opus/silk/float/find_LPC_FLP.c",
+ "opus/silk/float/noise_shape_analysis_FLP.c",
+ "opus/silk/float/warped_autocorrelation_FLP.c",
+ "opus/silk/float/find_LTP_FLP.c",
+ "opus/silk/float/pitch_analysis_core_FLP.c",
+ "opus/silk/float/wrappers_FLP.c",
+ "opus/silk/float/find_pitch_lags_FLP.c",
+ "opus/silk/float/prefilter_FLP.c",
+ "opus/silk/float/find_pred_coefs_FLP.c",
+ "opus/silk/float/process_gains_FLP.c"
+ ]
+
+
+opus_sources_lib+=opus_sources_silk
+env.drivers_sources+=opus_sources_lib
+env.drivers_sources+=opus_sources
+
+env.Append(CPPPATH=["#drivers/opus"])
+env.Append(CPPPATH=["#drivers/opus/celt","#drivers/opus/silk","#drivers/opus/silk/float"])
+env.Append(CFLAGS=["-DOPUS_HAVE_CONFIG_H"])
+
+Export('env')
diff --git a/drivers/opus/analysis.c b/drivers/opus/analysis.c
new file mode 100644
index 0000000000..47e8668b8e
--- /dev/null
+++ b/drivers/opus/analysis.c
@@ -0,0 +1,645 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "kiss_fft.h"
+#include "celt.h"
+#include "opus_modes.h"
+#include "arch.h"
+#include "quant_bands.h"
+#include <stdio.h>
+#include "analysis.h"
+#include "mlp.h"
+#include "stack_alloc.h"
+
+extern const MLP net;
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+static const float dct_table[128] = {
+ 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
+ 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
+ 0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
+ -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
+ 0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
+ -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
+ 0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
+ 0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
+ 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
+ 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
+ 0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
+ -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
+ 0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
+ -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
+ 0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
+ 0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
+};
+
+static const float analysis_window[240] = {
+ 0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
+ 0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
+ 0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
+ 0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
+ 0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
+ 0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
+ 0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
+ 0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
+ 0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
+ 0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
+ 0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
+ 0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
+ 0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
+ 0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
+ 0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
+ 0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
+ 0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
+ 0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
+ 0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
+ 0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
+ 0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
+ 0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
+ 0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
+ 0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
+ 0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
+ 0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
+ 0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
+ 0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
+ 0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
+ 0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
+};
+
+static const int tbands[NB_TBANDS+1] = {
+ 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
+};
+
+static const int extra_bands[NB_TOT_BANDS+1] = {
+ 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
+};
+
+/*static const float tweight[NB_TBANDS+1] = {
+ .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
+};*/
+
+#define NB_TONAL_SKIP_BANDS 9
+
+#define cA 0.43157974f
+#define cB 0.67848403f
+#define cC 0.08595542f
+#define cE ((float)M_PI/2)
+static OPUS_INLINE float fast_atan2f(float y, float x) {
+ float x2, y2;
+ /* Should avoid underflow on the values we'll get */
+ if (ABS16(x)+ABS16(y)<1e-9f)
+ {
+ x*=1e12f;
+ y*=1e12f;
+ }
+ x2 = x*x;
+ y2 = y*y;
+ if(x2<y2){
+ float den = (y2 + cB*x2) * (y2 + cC*x2);
+ if (den!=0)
+ return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
+ else
+ return (y<0 ? -cE : cE);
+ }else{
+ float den = (x2 + cB*y2) * (x2 + cC*y2);
+ if (den!=0)
+ return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
+ else
+ return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
+ }
+}
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
+{
+ int pos;
+ int curr_lookahead;
+ float psum;
+ int i;
+
+ pos = tonal->read_pos;
+ curr_lookahead = tonal->write_pos-tonal->read_pos;
+ if (curr_lookahead<0)
+ curr_lookahead += DETECT_SIZE;
+
+ if (len > 480 && pos != tonal->write_pos)
+ {
+ pos++;
+ if (pos==DETECT_SIZE)
+ pos=0;
+ }
+ if (pos == tonal->write_pos)
+ pos--;
+ if (pos<0)
+ pos = DETECT_SIZE-1;
+ OPUS_COPY(info_out, &tonal->info[pos], 1);
+ tonal->read_subframe += len/120;
+ while (tonal->read_subframe>=4)
+ {
+ tonal->read_subframe -= 4;
+ tonal->read_pos++;
+ }
+ if (tonal->read_pos>=DETECT_SIZE)
+ tonal->read_pos-=DETECT_SIZE;
+
+ /* Compensate for the delay in the features themselves.
+ FIXME: Need a better estimate the 10 I just made up */
+ curr_lookahead = IMAX(curr_lookahead-10, 0);
+
+ psum=0;
+ /* Summing the probability of transition patterns that involve music at
+ time (DETECT_SIZE-curr_lookahead-1) */
+ for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
+ psum += tonal->pmusic[i];
+ for (;i<DETECT_SIZE;i++)
+ psum += tonal->pspeech[i];
+ psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
+ /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
+
+ info_out->music_prob = psum;
+}
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
+{
+ int i, b;
+ const kiss_fft_state *kfft;
+ VARDECL(kiss_fft_cpx, in);
+ VARDECL(kiss_fft_cpx, out);
+ int N = 480, N2=240;
+ float * OPUS_RESTRICT A = tonal->angle;
+ float * OPUS_RESTRICT dA = tonal->d_angle;
+ float * OPUS_RESTRICT d2A = tonal->d2_angle;
+ VARDECL(float, tonality);
+ VARDECL(float, noisiness);
+ float band_tonality[NB_TBANDS];
+ float logE[NB_TBANDS];
+ float BFCC[8];
+ float features[25];
+ float frame_tonality;
+ float max_frame_tonality;
+ /*float tw_sum=0;*/
+ float frame_noisiness;
+ const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
+ float slope=0;
+ float frame_stationarity;
+ float relativeE;
+ float frame_probs[2];
+ float alpha, alphaE, alphaE2;
+ float frame_loudness;
+ float bandwidth_mask;
+ int bandwidth=0;
+ float maxE = 0;
+ float noise_floor;
+ int remaining;
+ AnalysisInfo *info;
+ SAVE_STACK;
+
+ tonal->last_transition++;
+ alpha = 1.f/IMIN(20, 1+tonal->count);
+ alphaE = 1.f/IMIN(50, 1+tonal->count);
+ alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
+
+ if (tonal->count<4)
+ tonal->music_prob = .5;
+ kfft = celt_mode->mdct.kfft[0];
+ if (tonal->count==0)
+ tonal->mem_fill = 240;
+ downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
+ if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
+ {
+ tonal->mem_fill += len;
+ /* Don't have enough to update the analysis */
+ RESTORE_STACK;
+ return;
+ }
+ info = &tonal->info[tonal->write_pos++];
+ if (tonal->write_pos>=DETECT_SIZE)
+ tonal->write_pos-=DETECT_SIZE;
+
+ ALLOC(in, 480, kiss_fft_cpx);
+ ALLOC(out, 480, kiss_fft_cpx);
+ ALLOC(tonality, 240, float);
+ ALLOC(noisiness, 240, float);
+ for (i=0;i<N2;i++)
+ {
+ float w = analysis_window[i];
+ in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
+ in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
+ in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
+ in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
+ }
+ OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
+ remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
+ downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
+ tonal->mem_fill = 240 + remaining;
+ opus_fft(kfft, in, out);
+
+ for (i=1;i<N2;i++)
+ {
+ float X1r, X2r, X1i, X2i;
+ float angle, d_angle, d2_angle;
+ float angle2, d_angle2, d2_angle2;
+ float mod1, mod2, avg_mod;
+ X1r = (float)out[i].r+out[N-i].r;
+ X1i = (float)out[i].i-out[N-i].i;
+ X2r = (float)out[i].i+out[N-i].i;
+ X2i = (float)out[N-i].r-out[i].r;
+
+ angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
+ d_angle = angle - A[i];
+ d2_angle = d_angle - dA[i];
+
+ angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
+ d_angle2 = angle2 - angle;
+ d2_angle2 = d_angle2 - d_angle;
+
+ mod1 = d2_angle - (float)floor(.5+d2_angle);
+ noisiness[i] = ABS16(mod1);
+ mod1 *= mod1;
+ mod1 *= mod1;
+
+ mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
+ noisiness[i] += ABS16(mod2);
+ mod2 *= mod2;
+ mod2 *= mod2;
+
+ avg_mod = .25f*(d2A[i]+2.f*mod1+mod2);
+ tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
+
+ A[i] = angle2;
+ dA[i] = d_angle2;
+ d2A[i] = mod2;
+ }
+
+ frame_tonality = 0;
+ max_frame_tonality = 0;
+ /*tw_sum = 0;*/
+ info->activity = 0;
+ frame_noisiness = 0;
+ frame_stationarity = 0;
+ if (!tonal->count)
+ {
+ for (b=0;b<NB_TBANDS;b++)
+ {
+ tonal->lowE[b] = 1e10;
+ tonal->highE[b] = -1e10;
+ }
+ }
+ relativeE = 0;
+ frame_loudness = 0;
+ for (b=0;b<NB_TBANDS;b++)
+ {
+ float E=0, tE=0, nE=0;
+ float L1, L2;
+ float stationarity;
+ for (i=tbands[b];i<tbands[b+1];i++)
+ {
+ float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+ + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef OPUS_FIXED_POINT
+ /* FIXME: It's probably best to change the BFCC filter initial state instead */
+ binE *= 5.55e-17f;
+#endif
+ E += binE;
+ tE += binE*tonality[i];
+ nE += binE*2.f*(.5f-noisiness[i]);
+ }
+ tonal->E[tonal->E_count][b] = E;
+ frame_noisiness += nE/(1e-15f+E);
+
+ frame_loudness += (float)sqrt(E+1e-10f);
+ logE[b] = (float)log(E+1e-10f);
+ tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
+ tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
+ if (tonal->highE[b] < tonal->lowE[b]+1.f)
+ {
+ tonal->highE[b]+=.5f;
+ tonal->lowE[b]-=.5f;
+ }
+ relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
+
+ L1=L2=0;
+ for (i=0;i<NB_FRAMES;i++)
+ {
+ L1 += (float)sqrt(tonal->E[i][b]);
+ L2 += tonal->E[i][b];
+ }
+
+ stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
+ stationarity *= stationarity;
+ stationarity *= stationarity;
+ frame_stationarity += stationarity;
+ /*band_tonality[b] = tE/(1e-15+E)*/;
+ band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
+#if 0
+ if (b>=NB_TONAL_SKIP_BANDS)
+ {
+ frame_tonality += tweight[b]*band_tonality[b];
+ tw_sum += tweight[b];
+ }
+#else
+ frame_tonality += band_tonality[b];
+ if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
+ frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
+#endif
+ max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality);
+ slope += band_tonality[b]*(b-8);
+ /*printf("%f %f ", band_tonality[b], stationarity);*/
+ tonal->prev_band_tonality[b] = band_tonality[b];
+ }
+
+ bandwidth_mask = 0;
+ bandwidth = 0;
+ maxE = 0;
+ noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef OPUS_FIXED_POINT
+ noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
+ noise_floor *= noise_floor;
+ for (b=0;b<NB_TOT_BANDS;b++)
+ {
+ float E=0;
+ int band_start, band_end;
+ /* Keep a margin of 300 Hz for aliasing */
+ band_start = extra_bands[b];
+ band_end = extra_bands[b+1];
+ for (i=band_start;i<band_end;i++)
+ {
+ float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+ + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+ E += binE;
+ }
+ maxE = MAX32(maxE, E);
+ tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
+ E = MAX32(E, tonal->meanE[b]);
+ /* Use a simple follower with 13 dB/Bark slope for spreading function */
+ bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
+ /* Consider the band "active" only if all these conditions are met:
+ 1) less than 10 dB below the simple follower
+ 2) less than 90 dB below the peak band (maximal masking possible considering
+ both the ATH and the loudness-dependent slope of the spreading function)
+ 3) above the PCM quantization noise floor
+ */
+ if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
+ bandwidth = b;
+ }
+ if (tonal->count<=2)
+ bandwidth = 20;
+ frame_loudness = 20*(float)log10(frame_loudness);
+ tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
+ tonal->lowECount *= (1-alphaE);
+ if (frame_loudness < tonal->Etracker-30)
+ tonal->lowECount += alphaE;
+
+ for (i=0;i<8;i++)
+ {
+ float sum=0;
+ for (b=0;b<16;b++)
+ sum += dct_table[i*16+b]*logE[b];
+ BFCC[i] = sum;
+ }
+
+ frame_stationarity /= NB_TBANDS;
+ relativeE /= NB_TBANDS;
+ if (tonal->count<10)
+ relativeE = .5;
+ frame_noisiness /= NB_TBANDS;
+#if 1
+ info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
+#else
+ info->activity = .5*(1+frame_noisiness-frame_stationarity);
+#endif
+ frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
+ frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
+ tonal->prev_tonality = frame_tonality;
+
+ slope /= 8*8;
+ info->tonality_slope = slope;
+
+ tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
+ tonal->count++;
+ info->tonality = frame_tonality;
+
+ for (i=0;i<4;i++)
+ features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i];
+
+ for (i=0;i<4;i++)
+ tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
+
+ for (i=0;i<4;i++)
+ features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]);
+ for (i=0;i<3;i++)
+ features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
+
+ if (tonal->count > 5)
+ {
+ for (i=0;i<9;i++)
+ tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
+ }
+
+ for (i=0;i<8;i++)
+ {
+ tonal->mem[i+24] = tonal->mem[i+16];
+ tonal->mem[i+16] = tonal->mem[i+8];
+ tonal->mem[i+8] = tonal->mem[i];
+ tonal->mem[i] = BFCC[i];
+ }
+ for (i=0;i<9;i++)
+ features[11+i] = (float)sqrt(tonal->std[i]);
+ features[20] = info->tonality;
+ features[21] = info->activity;
+ features[22] = frame_stationarity;
+ features[23] = info->tonality_slope;
+ features[24] = tonal->lowECount;
+
+#ifndef DISABLE_FLOAT_API
+ mlp_process(&net, features, frame_probs);
+ frame_probs[0] = .5f*(frame_probs[0]+1);
+ /* Curve fitting between the MLP probability and the actual probability */
+ frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
+ /* Probability of active audio (as opposed to silence) */
+ frame_probs[1] = .5f*frame_probs[1]+.5f;
+ /* Consider that silence has a 50-50 probability. */
+ frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
+
+ /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
+ {
+ /* Probability of state transition */
+ float tau;
+ /* Represents independence of the MLP probabilities, where
+ beta=1 means fully independent. */
+ float beta;
+ /* Denormalized probability of speech (p0) and music (p1) after update */
+ float p0, p1;
+ /* Probabilities for "all speech" and "all music" */
+ float s0, m0;
+ /* Probability sum for renormalisation */
+ float psum;
+ /* Instantaneous probability of speech and music, with beta pre-applied. */
+ float speech0;
+ float music0;
+
+ /* One transition every 3 minutes of active audio */
+ tau = .00005f*frame_probs[1];
+ beta = .05f;
+ if (1) {
+ /* Adapt beta based on how "unexpected" the new prob is */
+ float p, q;
+ p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+ q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+ beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
+ }
+ /* p0 and p1 are the probabilities of speech and music at this frame
+ using only information from previous frame and applying the
+ state transition model */
+ p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau;
+ p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
+ /* We apply the current probability with exponent beta to work around
+ the fact that the probability estimates aren't independent. */
+ p0 *= (float)pow(1-frame_probs[0], beta);
+ p1 *= (float)pow(frame_probs[0], beta);
+ /* Normalise the probabilities to get the Marokv probability of music. */
+ tonal->music_prob = p1/(p0+p1);
+ info->music_prob = tonal->music_prob;
+
+ /* This chunk of code deals with delayed decision. */
+ psum=1e-20f;
+ /* Instantaneous probability of speech and music, with beta pre-applied. */
+ speech0 = (float)pow(1-frame_probs[0], beta);
+ music0 = (float)pow(frame_probs[0], beta);
+ if (tonal->count==1)
+ {
+ tonal->pspeech[0]=.5;
+ tonal->pmusic [0]=.5;
+ }
+ /* Updated probability of having only speech (s0) or only music (m0),
+ before considering the new observation. */
+ s0 = tonal->pspeech[0] + tonal->pspeech[1];
+ m0 = tonal->pmusic [0] + tonal->pmusic [1];
+ /* Updates s0 and m0 with instantaneous probability. */
+ tonal->pspeech[0] = s0*(1-tau)*speech0;
+ tonal->pmusic [0] = m0*(1-tau)*music0;
+ /* Propagate the transition probabilities */
+ for (i=1;i<DETECT_SIZE-1;i++)
+ {
+ tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
+ tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
+ }
+ /* Probability that the latest frame is speech, when all the previous ones were music. */
+ tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
+ /* Probability that the latest frame is music, when all the previous ones were speech. */
+ tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
+
+ /* Renormalise probabilities to 1 */
+ for (i=0;i<DETECT_SIZE;i++)
+ psum += tonal->pspeech[i] + tonal->pmusic[i];
+ psum = 1.f/psum;
+ for (i=0;i<DETECT_SIZE;i++)
+ {
+ tonal->pspeech[i] *= psum;
+ tonal->pmusic [i] *= psum;
+ }
+ psum = tonal->pmusic[0];
+ for (i=1;i<DETECT_SIZE;i++)
+ psum += tonal->pspeech[i];
+
+ /* Estimate our confidence in the speech/music decisions */
+ if (frame_probs[1]>.75)
+ {
+ if (tonal->music_prob>.9)
+ {
+ float adapt;
+ adapt = 1.f/(++tonal->music_confidence_count);
+ tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
+ tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
+ }
+ if (tonal->music_prob<.1)
+ {
+ float adapt;
+ adapt = 1.f/(++tonal->speech_confidence_count);
+ tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
+ tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
+ }
+ } else {
+ if (tonal->music_confidence_count==0)
+ tonal->music_confidence = .9f;
+ if (tonal->speech_confidence_count==0)
+ tonal->speech_confidence = .1f;
+ }
+ }
+ if (tonal->last_music != (tonal->music_prob>.5f))
+ tonal->last_transition=0;
+ tonal->last_music = tonal->music_prob>.5f;
+#else
+ info->music_prob = 0;
+#endif
+ /*for (i=0;i<25;i++)
+ printf("%f ", features[i]);
+ printf("\n");*/
+
+ info->bandwidth = bandwidth;
+ /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
+ info->noisiness = frame_noisiness;
+ info->valid = 1;
+ if (info_out!=NULL)
+ OPUS_COPY(info_out, info, 1);
+ RESTORE_STACK;
+}
+
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+ int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+ int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+{
+ int offset;
+ int pcm_len;
+
+ if (analysis_pcm != NULL)
+ {
+ /* Avoid overflow/wrap-around of the analysis buffer */
+ analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+ pcm_len = analysis_frame_size - analysis->analysis_offset;
+ offset = analysis->analysis_offset;
+ do {
+ tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+ offset += 480;
+ pcm_len -= 480;
+ } while (pcm_len>0);
+ analysis->analysis_offset = analysis_frame_size;
+
+ analysis->analysis_offset -= frame_size;
+ }
+
+ analysis_info->valid = 0;
+ tonality_get_info(analysis, analysis_info, frame_size);
+}
diff --git a/drivers/opus/analysis.h b/drivers/opus/analysis.h
new file mode 100644
index 0000000000..be0388faa3
--- /dev/null
+++ b/drivers/opus/analysis.h
@@ -0,0 +1,90 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ANALYSIS_H
+#define ANALYSIS_H
+
+#include "celt.h"
+#include "opus_private.h"
+
+#define NB_FRAMES 8
+#define NB_TBANDS 18
+#define NB_TOT_BANDS 21
+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
+
+#define DETECT_SIZE 200
+
+typedef struct {
+ float angle[240];
+ float d_angle[240];
+ float d2_angle[240];
+ opus_val32 inmem[ANALYSIS_BUF_SIZE];
+ int mem_fill; /* number of usable samples in the buffer */
+ float prev_band_tonality[NB_TBANDS];
+ float prev_tonality;
+ float E[NB_FRAMES][NB_TBANDS];
+ float lowE[NB_TBANDS];
+ float highE[NB_TBANDS];
+ float meanE[NB_TOT_BANDS];
+ float mem[32];
+ float cmean[8];
+ float std[9];
+ float music_prob;
+ float Etracker;
+ float lowECount;
+ int E_count;
+ int last_music;
+ int last_transition;
+ int count;
+ float subframe_mem[3];
+ int analysis_offset;
+ /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
+ pspeech[0] is the probability that all frames in the window are speech. */
+ float pspeech[DETECT_SIZE];
+ /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
+ pmusic[0] is the probability that all frames in the window are music. */
+ float pmusic[DETECT_SIZE];
+ float speech_confidence;
+ float music_confidence;
+ int speech_confidence_count;
+ int music_confidence_count;
+ int write_pos;
+ int read_pos;
+ int read_subframe;
+ AnalysisInfo info[DETECT_SIZE];
+} TonalityAnalysisState;
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
+ const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
+
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+ int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+ int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+
+#endif
diff --git a/drivers/opus/audio_stream_opus.cpp b/drivers/opus/audio_stream_opus.cpp
new file mode 100644
index 0000000000..eb9c81e152
--- /dev/null
+++ b/drivers/opus/audio_stream_opus.cpp
@@ -0,0 +1,376 @@
+/*************************************************************************/
+/* audio_stream_opus.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* http://www.godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Author: George Marques <george@gmarqu.es> */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+#include "audio_stream_opus.h"
+
+const float AudioStreamPlaybackOpus::osrate=48000.0f;
+
+int AudioStreamPlaybackOpus::_op_read_func(void *_stream, unsigned char *_ptr, int _nbytes) {
+ FileAccess *fa=(FileAccess*)_stream;
+
+ if(fa->eof_reached())
+ return 0;
+
+ uint8_t *dst = (uint8_t*)_ptr;
+
+ int read = fa->get_buffer(dst, _nbytes);
+
+ return read;
+}
+
+int AudioStreamPlaybackOpus::_op_seek_func(void *_stream, opus_int64 _offset, int _whence){
+
+#ifdef SEEK_SET
+ FileAccess *fa=(FileAccess*)_stream;
+
+ switch (_whence) {
+ case SEEK_SET: {
+ fa->seek(_offset);
+ } break;
+ case SEEK_CUR: {
+ fa->seek(fa->get_pos()+_offset);
+ } break;
+ case SEEK_END: {
+ fa->seek_end(_offset);
+ } break;
+ default: {
+ ERR_PRINT("BUG, wtf was whence set to?\n");
+ }
+ }
+ int ret=fa->eof_reached()?-1:0;
+ return ret;
+#else
+ return -1; // no seeking
+#endif
+}
+
+int AudioStreamPlaybackOpus::_op_close_func(void *_stream) {
+ if (!_stream)
+ return 0;
+ FileAccess *fa=(FileAccess*)_stream;
+ if (fa->is_open())
+ fa->close();
+ return 0;
+}
+
+opus_int64 AudioStreamPlaybackOpus::_op_tell_func(void *_stream) {
+ FileAccess *_fa = (FileAccess*)_stream;
+ return (opus_int64)_fa->get_pos();
+}
+
+void AudioStreamPlaybackOpus::_clear_stream() {
+ if(!stream_loaded)
+ return;
+
+ op_free(opus_file);
+ _close_file();
+
+ stream_loaded=false;
+ stream_channels=1;
+ playing=false;
+}
+
+void AudioStreamPlaybackOpus::_close_file() {
+ if (f) {
+ memdelete(f);
+ f=NULL;
+ }
+}
+
+Error AudioStreamPlaybackOpus::_load_stream() {
+
+ ERR_FAIL_COND_V(!stream_valid,ERR_UNCONFIGURED);
+
+ _clear_stream();
+ if (file=="")
+ return ERR_INVALID_DATA;
+
+ Error err;
+ f=FileAccess::open(file,FileAccess::READ,&err);
+
+ if (err) {
+ ERR_FAIL_COND_V( err, err );
+ }
+
+ int _err = 0;
+
+ opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&_err);
+
+ switch (_err) {
+ case OP_EREAD: { // - Can't read the file.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_CANT_READ );
+ } break;
+ case OP_EVERSION: // - Unrecognized version number.
+ case OP_ENOTFORMAT: // - Stream is not Opus data.
+ case OP_EIMPL : { // - Stream used non-implemented feature.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
+ } break;
+ case OP_EBADLINK: // - Failed to find old data after seeking.
+ case OP_EBADTIMESTAMP: // - Timestamp failed the validity checks.
+ case OP_EBADHEADER: { // - Invalid or mising Opus bitstream header.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_CORRUPT );
+ } break;
+ case OP_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_BUG );
+ } break;
+ }
+ repeats=0;
+ stream_loaded=true;
+
+
+ return OK;
+}
+
+AudioStreamPlaybackOpus::AudioStreamPlaybackOpus() {
+ loops=false;
+ playing=false;
+ f = NULL;
+ stream_loaded=false;
+ stream_valid=false;
+ repeats=0;
+ paused=true;
+ stream_channels=0;
+ current_section=0;
+ length=0;
+ loop_restart_time=0;
+ pre_skip=0;
+
+ _op_callbacks.read = _op_read_func;
+ _op_callbacks.seek = _op_seek_func;
+ _op_callbacks.tell = _op_tell_func;
+ _op_callbacks.close = _op_close_func;
+}
+
+Error AudioStreamPlaybackOpus::set_file(const String &p_file) {
+ file=p_file;
+ stream_valid=false;
+ Error err;
+ f=FileAccess::open(file,FileAccess::READ,&err);
+
+ if (err) {
+ ERR_FAIL_COND_V( err, err );
+ }
+
+ int _err;
+
+ opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&_err);
+
+ switch (_err) {
+ case OP_EREAD: { // - Can't read the file.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_CANT_READ );
+ } break;
+ case OP_EVERSION: // - Unrecognized version number.
+ case OP_ENOTFORMAT: // - Stream is not Opus data.
+ case OP_EIMPL : { // - Stream used non-implemented feature.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
+ } break;
+ case OP_EBADLINK: // - Failed to find old data after seeking.
+ case OP_EBADTIMESTAMP: // - Timestamp failed the validity checks.
+ case OP_EBADHEADER: { // - Invalid or mising Opus bitstream header.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_CORRUPT );
+ } break;
+ case OP_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_BUG );
+ } break;
+ }
+
+ const OpusHead *oinfo = op_head(opus_file,-1);
+
+ stream_channels=oinfo->channel_count;
+ pre_skip=oinfo->pre_skip;
+ frames_mixed=pre_skip;
+ ogg_int64_t len = op_pcm_total(opus_file,-1);
+ if(len < 0) {
+ length = 0;
+ } else {
+ length=(len/osrate);
+ }
+
+ op_free(opus_file);
+ memdelete(f);
+ f=NULL;
+ stream_valid=true;
+
+
+ return OK;
+}
+
+void AudioStreamPlaybackOpus::play(float p_from) {
+ if (playing)
+ stop();
+
+ if (_load_stream()!=OK)
+ return;
+
+ frames_mixed=pre_skip;
+ playing=true;
+ if (p_from>0) {
+ seek_pos(p_from);
+ }
+}
+
+void AudioStreamPlaybackOpus::stop() {
+ _clear_stream();
+ playing=false;
+}
+
+void AudioStreamPlaybackOpus::seek_pos(float p_time) {
+ if(!playing) return;
+ ogg_int64_t pcm_offset = (ogg_int64_t)(p_time * osrate);
+ bool ok = op_pcm_seek(opus_file,pcm_offset)==0;
+ if(!ok) {
+ ERR_PRINT("Seek time over stream size.");
+ return;
+ }
+ frames_mixed=osrate*p_time;
+}
+
+int AudioStreamPlaybackOpus::mix(int16_t* p_bufer,int p_frames) {
+ if (!playing)
+ return 0;
+
+ int total=p_frames;
+
+ while (true) {
+
+ int todo = p_frames;
+
+ if (todo==0 || todo<MIN_MIX) {
+ break;
+ }
+
+ int ret=op_read(opus_file,(opus_int16*)p_bufer,todo*stream_channels,&current_section);
+ if (ret<0) {
+ playing = false;
+ ERR_EXPLAIN("Error reading Opus File: "+file);
+ ERR_BREAK(ret<0);
+ } else if (ret==0) { // end of song, reload?
+ op_free(opus_file);
+
+ _close_file();
+
+ f=FileAccess::open(file,FileAccess::READ);
+
+ int errv = 0;
+ opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&errv);
+ if (errv!=0) {
+ playing=false;
+ break; // :(
+ }
+
+ if (!has_loop()) {
+ playing=false;
+ repeats=1;
+ break;
+ }
+
+ if (loop_restart_time) {
+ bool ok = op_pcm_seek(opus_file, (loop_restart_time*osrate)+pre_skip)==0;
+ if (!ok) {
+ playing=false;
+ ERR_PRINT("loop restart time rejected")
+ }
+
+ frames_mixed=(loop_restart_time*osrate)+pre_skip;
+ } else {
+ frames_mixed=pre_skip;
+ }
+ repeats++;
+ continue;
+
+ }
+
+ stream_channels=op_head(opus_file,current_section)->channel_count;
+
+ frames_mixed+=ret;
+
+ p_bufer+=ret*stream_channels;
+ p_frames-=ret;
+
+ }
+
+ return total-p_frames;
+}
+
+float AudioStreamPlaybackOpus::get_length() const {
+ if(!stream_loaded) {
+ if(const_cast<AudioStreamPlaybackOpus*>(this)->_load_stream() != OK)
+ return 0;
+ }
+ return length;
+}
+
+float AudioStreamPlaybackOpus::get_pos() const {
+
+ int32_t frames = int32_t(frames_mixed);
+ if (frames < 0)
+ frames=0;
+ return double(frames) / osrate;
+}
+
+int AudioStreamPlaybackOpus::get_minimum_buffer_size() const {
+ return MIN_MIX;
+}
+
+AudioStreamPlaybackOpus::~AudioStreamPlaybackOpus() {
+ _clear_stream();
+}
+
+RES ResourceFormatLoaderAudioStreamOpus::load(const String &p_path, const String& p_original_path, Error *r_error) {
+ if (r_error)
+ *r_error=OK;
+
+ AudioStreamOpus *opus_stream = memnew(AudioStreamOpus);
+ opus_stream->set_file(p_path);
+ return Ref<AudioStreamOpus>(opus_stream);
+}
+
+void ResourceFormatLoaderAudioStreamOpus::get_recognized_extensions(List<String> *p_extensions) const {
+
+ p_extensions->push_back("opus");
+}
+String ResourceFormatLoaderAudioStreamOpus::get_resource_type(const String &p_path) const {
+
+ if (p_path.extension().to_lower()=="opus")
+ return "AudioStreamOpus";
+ return "";
+}
+
+bool ResourceFormatLoaderAudioStreamOpus::handles_type(const String& p_type) const {
+ return (p_type=="AudioStream" || p_type=="AudioStreamOpus");
+}
diff --git a/drivers/opus/audio_stream_opus.h b/drivers/opus/audio_stream_opus.h
new file mode 100644
index 0000000000..2f173cc270
--- /dev/null
+++ b/drivers/opus/audio_stream_opus.h
@@ -0,0 +1,141 @@
+/*************************************************************************/
+/* audio_stream_opus.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* http://www.godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Author: George Marques <george@gmarqu.es> */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
+#ifndef AUDIO_STREAM_OPUS_H
+#define AUDIO_STREAM_OPUS_H
+
+#include "scene/resources/audio_stream.h"
+#include "opus/opusfile.h"
+#include "opus/internal.h"
+#include "os/file_access.h"
+#include "io/resource_loader.h"
+
+class AudioStreamPlaybackOpus : public AudioStreamPlayback {
+
+ OBJ_TYPE(AudioStreamPlaybackOpus,AudioStreamPlayback)
+
+ enum {
+ MIN_MIX=1024
+ };
+
+ FileAccess *f;
+
+ OpusFileCallbacks _op_callbacks;
+ float length;
+ static int _op_read_func(void *_stream, unsigned char *_ptr, int _nbytes);
+ static int _op_seek_func(void *_stream, opus_int64 _offset, int _whence);
+ static int _op_close_func(void *_stream);
+ static opus_int64 _op_tell_func(void *_stream);
+ static const float osrate;
+
+ String file;
+ int64_t frames_mixed;
+
+ bool stream_loaded;
+ volatile bool playing;
+ OggOpusFile *opus_file;
+ int stream_channels;
+ int current_section;
+ int pre_skip;
+
+ bool paused;
+ bool loops;
+ int repeats;
+
+ Error _load_stream();
+ void _clear_stream();
+ void _close_file();
+
+ bool stream_valid;
+ float loop_restart_time;
+
+public:
+ Error set_file(const String& p_file);
+
+ virtual void play(float p_from=0);
+ virtual void stop();
+ virtual bool is_playing() const { return playing; }
+
+ virtual void set_loop_restart_time(float p_time) { loop_restart_time=p_time; }
+
+ virtual void set_paused(bool p_paused) { paused=p_paused; }
+ virtual bool is_paused() const { return paused; }
+
+ virtual void set_loop(bool p_enable) { loops=p_enable; }
+ virtual bool has_loop() const {return loops; }
+
+ virtual float get_length() const;
+
+ virtual String get_stream_name() const { return ""; }
+
+ virtual int get_loop_count() const { return repeats; }
+
+ virtual float get_pos() const;
+ virtual void seek_pos(float p_time);
+
+ virtual int get_channels() const { return stream_channels; }
+ virtual int get_mix_rate() const { return osrate; }
+
+ virtual int get_minimum_buffer_size() const;
+
+ virtual int mix(int16_t* p_bufer,int p_frames);
+
+ AudioStreamPlaybackOpus();
+ ~AudioStreamPlaybackOpus();
+};
+
+
+class AudioStreamOpus: public AudioStream {
+
+ OBJ_TYPE(AudioStreamOpus,AudioStream)
+
+ String file;
+public:
+
+ Ref<AudioStreamPlayback> instance_playback() {
+ Ref<AudioStreamPlaybackOpus> pb = memnew( AudioStreamPlaybackOpus );
+ pb->set_file(file);
+ return pb;
+ }
+
+ void set_file(const String& p_file) { file=p_file; }
+
+};
+
+class ResourceFormatLoaderAudioStreamOpus: public ResourceFormatLoader {
+public:
+ virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
+ virtual void get_recognized_extensions(List<String> *p_extensions) const;
+ virtual bool handles_type(const String& p_type) const;
+ virtual String get_resource_type(const String &p_path) const;
+};
+
+#endif // AUDIO_STREAM_OPUS_H
diff --git a/drivers/opus/celt/_kiss_fft_guts.h b/drivers/opus/celt/_kiss_fft_guts.h
new file mode 100644
index 0000000000..21bea8a9b0
--- /dev/null
+++ b/drivers/opus/celt/_kiss_fft_guts.h
@@ -0,0 +1,183 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_GUTS_H
+#define KISS_FFT_GUTS_H
+
+#define MIN(a,b) ((a)<(b) ? (a):(b))
+#define MAX(a,b) ((a)>(b) ? (a):(b))
+
+/* kiss_fft.h
+ defines kiss_fft_scalar as either short or a float type
+ and defines
+ typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+#include "kiss_fft.h"
+
+/*
+ Explanation of macros dealing with complex math:
+
+ C_MUL(m,a,b) : m = a*b
+ C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
+ C_SUB( res, a,b) : res = a - b
+ C_SUBFROM( res , a) : res -= a
+ C_ADDTO( res , a) : res += a
+ * */
+#ifdef OPUS_FIXED_POINT
+#include "arch.h"
+
+
+#define SAMP_MAX 2147483647
+#define TWID_MAX 32767
+#define TRIG_UPSCALE 1
+
+#define SAMP_MIN -SAMP_MAX
+
+
+# define S_MUL(a,b) MULT16_32_Q15(b, a)
+
+# define C_MUL(m,a,b) \
+ do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+ (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
+
+# define C_MULC(m,a,b) \
+ do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+ (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
+
+# define C_MUL4(m,a,b) \
+ do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
+ (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
+
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r = S_MUL( (c).r , s ) ;\
+ (c).i = S_MUL( (c).i , s ) ; }while(0)
+
+# define DIVSCALAR(x,k) \
+ (x) = S_MUL( x, (TWID_MAX-((k)>>1))/(k)+1 )
+
+# define C_FIXDIV(c,div) \
+ do { DIVSCALAR( (c).r , div); \
+ DIVSCALAR( (c).i , div); }while (0)
+
+#define C_ADD( res, a,b)\
+ do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \
+ }while(0)
+#define C_SUB( res, a,b)\
+ do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \
+ }while(0)
+#define C_ADDTO( res , a)\
+ do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\
+ }while(0)
+
+#define C_SUBFROM( res , a)\
+ do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \
+ }while(0)
+
+#if defined(OPUS_ARM_INLINE_ASM)
+#include "arm/kiss_fft_armv4.h"
+#endif
+
+#if defined(OPUS_ARM_INLINE_EDSP)
+#include "arm/kiss_fft_armv5e.h"
+#endif
+
+#else /* not OPUS_FIXED_POINT*/
+
+# define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#define C_MULC(m,a,b) \
+ do{ (m).r = (a).r*(b).r + (a).i*(b).i;\
+ (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)
+
+#define C_MUL4(m,a,b) C_MUL(m,a,b)
+
+# define C_FIXDIV(c,div) /* NOOP */
+# define C_MULBYSCALAR( c, s ) \
+ do{ (c).r *= (s);\
+ (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#ifndef C_ADD
+#define C_ADD( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+ (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
+ }while(0)
+#define C_SUB( res, a,b)\
+ do { \
+ CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+ CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+ (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
+ }while(0)
+#define C_ADDTO( res , a)\
+ do { \
+ CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+ (res).r += (a).r; (res).i += (a).i;\
+ }while(0)
+
+#define C_SUBFROM( res , a)\
+ do {\
+ CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+ CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+ (res).r -= (a).r; (res).i -= (a).i; \
+ }while(0)
+#endif /* C_ADD defined */
+
+#ifdef OPUS_FIXED_POINT
+# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
+# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))
+# define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase))
+# define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase))
+# define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+# define HALF_OF(x) ((x)*_mm_set1_ps(.5f))
+#else
+# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+# define HALF_OF(x) ((x)*.5f)
+#endif
+
+#define kf_cexp(x,phase) \
+ do{ \
+ (x)->r = KISS_FFT_COS(phase);\
+ (x)->i = KISS_FFT_SIN(phase);\
+ }while(0)
+
+#define kf_cexp2(x,phase) \
+ do{ \
+ (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
+ (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
+}while(0)
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/drivers/opus/celt/arch.h b/drivers/opus/celt/arch.h
new file mode 100644
index 0000000000..83e3705000
--- /dev/null
+++ b/drivers/opus/celt/arch.h
@@ -0,0 +1,214 @@
+/* Copyright (c) 2003-2008 Jean-Marc Valin
+ Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file arch.h
+ @brief Various architecture definitions for CELT
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ARCH_H
+#define ARCH_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+# if !defined(__GNUC_PREREQ)
+# if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+# define __GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+# else
+# define __GNUC_PREREQ(_maj,_min) 0
+# endif
+# endif
+
+#define CELT_SIG_SCALE 32768.f
+
+#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
+#ifdef ENABLE_ASSERTIONS
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
+{
+ fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
+ abort();
+}
+#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}
+#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}
+#else
+#define celt_assert(cond)
+#define celt_assert2(cond, message)
+#endif
+
+#define IMUL32(a,b) ((a)*(b))
+
+#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */
+#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */
+#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */
+#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */
+#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */
+#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */
+#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */
+#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
+#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */
+#define UADD32(a,b) ((a)+(b))
+#define USUB32(a,b) ((a)-(b))
+
+#define PRINT_MIPS(file)
+
+#ifdef OPUS_FIXED_POINT
+
+typedef opus_int16 opus_val16;
+typedef opus_int32 opus_val32;
+
+typedef opus_val32 celt_sig;
+typedef opus_val16 celt_norm;
+typedef opus_val32 celt_ener;
+
+#define Q15ONE 32767
+
+#define SIG_SHIFT 12
+
+#define NORM_SCALING 16384
+
+#define DB_SHIFT 10
+
+#define EPSILON 1
+#define VERY_SMALL 0
+#define VERY_LARGE16 ((opus_val16)32767)
+#define Q15_ONE ((opus_val16)32767)
+
+#define SCALEIN(a) (a)
+#define SCALEOUT(a) (a)
+
+#ifdef FIXED_DEBUG
+#include "fixed_debug.h"
+#else
+
+#include "fixed_generic.h"
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/fixed_armv5e.h"
+#elif defined (OPUS_ARM_INLINE_ASM)
+#include "arm/fixed_armv4.h"
+#elif defined (BFIN_ASM)
+#include "fixed_bfin.h"
+#elif defined (TI_C5X_ASM)
+#include "fixed_c5x.h"
+#elif defined (TI_C6X_ASM)
+#include "fixed_c6x.h"
+#endif
+
+#endif
+
+#else /* OPUS_FIXED_POINT */
+
+typedef float opus_val16;
+typedef float opus_val32;
+
+typedef float celt_sig;
+typedef float celt_norm;
+typedef float celt_ener;
+
+#define Q15ONE 1.0f
+
+#define NORM_SCALING 1.f
+
+#define EPSILON 1e-15f
+#define VERY_SMALL 1e-30f
+#define VERY_LARGE16 1e15f
+#define Q15_ONE ((opus_val16)1.f)
+
+#define QCONST16(x,bits) (x)
+#define QCONST32(x,bits) (x)
+
+#define NEG16(x) (-(x))
+#define NEG32(x) (-(x))
+#define EXTRACT16(x) (x)
+#define EXTEND32(x) (x)
+#define SHR16(a,shift) (a)
+#define SHL16(a,shift) (a)
+#define SHR32(a,shift) (a)
+#define SHL32(a,shift) (a)
+#define PSHR32(a,shift) (a)
+#define VSHR32(a,shift) (a)
+
+#define PSHR(a,shift) (a)
+#define SHR(a,shift) (a)
+#define SHL(a,shift) (a)
+#define SATURATE(x,a) (x)
+#define SATURATE16(x) (x)
+
+#define ROUND16(a,shift) (a)
+#define HALF16(x) (.5f*(x))
+#define HALF32(x) (.5f*(x))
+
+#define ADD16(a,b) ((a)+(b))
+#define SUB16(a,b) ((a)-(b))
+#define ADD32(a,b) ((a)+(b))
+#define SUB32(a,b) ((a)-(b))
+#define MULT16_16_16(a,b) ((a)*(b))
+#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
+#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))
+
+#define MULT16_32_Q15(a,b) ((a)*(b))
+#define MULT16_32_Q16(a,b) ((a)*(b))
+
+#define MULT32_32_Q31(a,b) ((a)*(b))
+
+#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
+
+#define MULT16_16_Q11_32(a,b) ((a)*(b))
+#define MULT16_16_Q11(a,b) ((a)*(b))
+#define MULT16_16_Q13(a,b) ((a)*(b))
+#define MULT16_16_Q14(a,b) ((a)*(b))
+#define MULT16_16_Q15(a,b) ((a)*(b))
+#define MULT16_16_P15(a,b) ((a)*(b))
+#define MULT16_16_P13(a,b) ((a)*(b))
+#define MULT16_16_P14(a,b) ((a)*(b))
+#define MULT16_32_P16(a,b) ((a)*(b))
+
+#define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b))
+#define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b))
+
+#define SCALEIN(a) ((a)*CELT_SIG_SCALE)
+#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE))
+
+#endif /* !OPUS_FIXED_POINT */
+
+#ifndef GLOBAL_STACK_SIZE
+#ifdef OPUS_FIXED_POINT
+#define GLOBAL_STACK_SIZE 100000
+#else
+#define GLOBAL_STACK_SIZE 100000
+#endif
+#endif
+
+#endif /* ARCH_H */
diff --git a/drivers/opus/celt/arm/arm2gnu.pl b/drivers/opus/celt/arm/arm2gnu.pl
new file mode 100755
index 0000000000..eab42efa2b
--- /dev/null
+++ b/drivers/opus/celt/arm/arm2gnu.pl
@@ -0,0 +1,316 @@
+#!/usr/bin/perl
+
+my $bigend; # little/big endian
+my $nxstack;
+
+$nxstack = 0;
+
+eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
+ if $running_under_some_shell;
+
+while ($ARGV[0] =~ /^-/) {
+ $_ = shift;
+ last if /^--/;
+ if (/^-n/) {
+ $nflag++;
+ next;
+ }
+ die "I don't recognize this switch: $_\\n";
+}
+$printit++ unless $nflag;
+
+$\ = "\n"; # automatically add newline on print
+$n=0;
+
+$thumb = 0; # ARM mode by default, not Thumb.
+@proc_stack = ();
+
+LINE:
+while (<>) {
+
+ # For ADRLs we need to add a new line after the substituted one.
+ $addPadding = 0;
+
+ # First, we do not dare to touch *anything* inside double quotes, do we?
+ # Second, if you want a dollar character in the string,
+ # insert two of them -- that's how ARM C and assembler treat strings.
+ s/^([A-Za-z_]\w*)[ \t]+DCB[ \t]*\"/$1: .ascii \"/ && do { s/\$\$/\$/g; next };
+ s/\bDCB\b[ \t]*\"/.ascii \"/ && do { s/\$\$/\$/g; next };
+ s/^(\S+)\s+RN\s+(\S+)/$1 .req r$2/ && do { s/\$\$/\$/g; next };
+ # If there's nothing on a line but a comment, don't try to apply any further
+ # substitutions (this is a cheap hack to avoid mucking up the license header)
+ s/^([ \t]*);/$1@/ && do { s/\$\$/\$/g; next };
+ # If substituted -- leave immediately !
+
+ s/@/,:/;
+ s/;/@/;
+ while ( /@.*'/ ) {
+ s/(@.*)'/$1/g;
+ }
+ s/\{FALSE\}/0/g;
+ s/\{TRUE\}/1/g;
+ s/\{(\w\w\w\w+)\}/$1/g;
+ s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
+ s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
+ s/\bIMPORT\b/.extern/;
+ s/\bEXPORT\b/.global/;
+ s/^(\s+)\[/$1IF/;
+ s/^(\s+)\|/$1ELSE/;
+ s/^(\s+)\]/$1ENDIF/;
+ s/IF *:DEF:/ .ifdef/;
+ s/IF *:LNOT: *:DEF:/ .ifndef/;
+ s/ELSE/ .else/;
+ s/ENDIF/ .endif/;
+
+ if( /\bIF\b/ ) {
+ s/\bIF\b/ .if/;
+ s/=/==/;
+ }
+ if ( $n == 2) {
+ s/\$/\\/g;
+ }
+ if ($n == 1) {
+ s/\$//g;
+ s/label//g;
+ $n = 2;
+ }
+ if ( /MACRO/ ) {
+ s/MACRO *\n/.macro/;
+ $n=1;
+ }
+ if ( /\bMEND\b/ ) {
+ s/\bMEND\b/.endm/;
+ $n=0;
+ }
+
+ # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there.
+ #
+ if ( /\bAREA\b/ ) {
+ my $align;
+ $align = "2";
+ if ( /ALIGN=(\d+)/ ) {
+ $align = $1;
+ }
+ if ( /CODE/ ) {
+ $nxstack = 1;
+ }
+ s/^(.+)CODE(.+)READONLY(.*)/ .text/;
+ s/^(.+)DATA(.+)READONLY(.*)/ .section .rdata/;
+ s/^(.+)\|\|\.data\|\|(.+)/ .data/;
+ s/^(.+)\|\|\.bss\|\|(.+)/ .bss/;
+ s/$/; .p2align $align/;
+ # Enable NEON instructions but don't produce a binary that requires
+ # ARMv7. RVCT does not have equivalent directives, so we just do this
+ # for all CODE areas.
+ if ( /.text/ ) {
+ # Separating .arch, .fpu, etc., by semicolons does not work (gas
+ # thinks the semicolon is part of the arch name, even when there's
+ # whitespace separating them). Sadly this means our line numbers
+ # won't match the original source file (we could use the .line
+ # directive, which is documented to be obsolete, but then gdb will
+ # show the wrong line in the translated source file).
+ s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/;
+ }
+ }
+
+ s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/; # ||.constdata$3||
+ s/\|\|\.bss\$(\d+)\|\|/.L_BSS$1/; # ||.bss$2||
+ s/\|\|\.data\$(\d+)\|\|/.L_DATA$1/; # ||.data$2||
+ s/\|\|([a-zA-Z0-9_]+)\@([a-zA-Z0-9_]+)\|\|/@ $&/;
+ s/^(\s+)\%(\s)/ .space $1/;
+
+ s/\|(.+)\.(\d+)\|/\.$1_$2/; # |L80.123| -> .L80_123
+ s/\bCODE32\b/.code 32/ && do {$thumb = 0};
+ s/\bCODE16\b/.code 16/ && do {$thumb = 1};
+ if (/\bPROC\b/)
+ {
+ my $prefix;
+ my $proc;
+ /^([A-Za-z_\.]\w+)\b/;
+ $proc = $1;
+ $prefix = "";
+ if ($proc)
+ {
+ $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc);
+ push(@proc_stack, $proc);
+ s/^[A-Za-z_\.]\w+/$&:/;
+ }
+ $prefix = $prefix."\t.thumb_func; " if ($thumb);
+ s/\bPROC\b/@ $&/;
+ $_ = $prefix.$_;
+ }
+ s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/;
+ s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/;
+ if (/\bENDP\b/)
+ {
+ my $proc;
+ s/\bENDP\b/@ $&/;
+ $proc = pop(@proc_stack);
+ $_ = "\t.size $proc, .-$proc".$_ if ($proc);
+ }
+ s/\bSUBT\b/@ $&/;
+ s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25
+ s/\bKEEP\b/@ $&/;
+ s/\bEXPORTAS\b/@ $&/;
+ s/\|\|(.)+\bEQU\b/@ $&/;
+ s/\|\|([\w\$]+)\|\|/$1/;
+ s/\bENTRY\b/@ $&/;
+ s/\bASSERT\b/@ $&/;
+ s/\bGBLL\b/@ $&/;
+ s/\bGBLA\b/@ $&/;
+ s/^\W+OPT\b/@ $&/;
+ s/:OR:/|/g;
+ s/:SHL:/<</g;
+ s/:SHR:/>>/g;
+ s/:AND:/&/g;
+ s/:LAND:/&&/g;
+ s/CPSR/cpsr/;
+ s/SPSR/spsr/;
+ s/ALIGN$/.balign 4/;
+ s/ALIGN\s+([0-9x]+)$/.balign $1/;
+ s/psr_cxsf/psr_all/;
+ s/LTORG/.ltorg/;
+ s/^([A-Za-z_]\w*)[ \t]+EQU/ .set $1,/;
+ s/^([A-Za-z_]\w*)[ \t]+SETL/ .set $1,/;
+ s/^([A-Za-z_]\w*)[ \t]+SETA/ .set $1,/;
+ s/^([A-Za-z_]\w*)[ \t]+\*/ .set $1,/;
+
+ # {PC} + 0xdeadfeed --> . + 0xdeadfeed
+ s/\{PC\} \+/ \. +/;
+
+ # Single hex constant on the line !
+ #
+ # >>> NOTE <<<
+ # Double-precision floats in gcc are always mixed-endian, which means
+ # bytes in two words are little-endian, but words are big-endian.
+ # So, 0x0000deadfeed0000 would be stored as 0x0000dead at low address
+ # and 0xfeed0000 at high address.
+ #
+ s/\bDCFD\b[ \t]+0x([a-fA-F0-9]{8})([a-fA-F0-9]{8})/.long 0x$1, 0x$2/;
+ # Only decimal constants on the line, no hex !
+ s/\bDCFD\b[ \t]+([0-9\.\-]+)/.double $1/;
+
+ # Single hex constant on the line !
+# s/\bDCFS\b[ \t]+0x([a-f0-9]{8})([a-f0-9]{8})/.long 0x$1, 0x$2/;
+ # Only decimal constants on the line, no hex !
+# s/\bDCFS\b[ \t]+([0-9\.\-]+)/.double $1/;
+ s/\bDCFS[ \t]+0x/.word 0x/;
+ s/\bDCFS\b/.float/;
+
+ s/^([A-Za-z_]\w*)[ \t]+DCD/$1 .word/;
+ s/\bDCD\b/.word/;
+ s/^([A-Za-z_]\w*)[ \t]+DCW/$1 .short/;
+ s/\bDCW\b/.short/;
+ s/^([A-Za-z_]\w*)[ \t]+DCB/$1 .byte/;
+ s/\bDCB\b/.byte/;
+ s/^([A-Za-z_]\w*)[ \t]+\%/.comm $1,/;
+ s/^[A-Za-z_\.]\w+/$&:/;
+ s/^(\d+)/$1:/;
+ s/\%(\d+)/$1b_or_f/;
+ s/\%[Bb](\d+)/$1b/;
+ s/\%[Ff](\d+)/$1f/;
+ s/\%[Ff][Tt](\d+)/$1f/;
+ s/&([\dA-Fa-f]+)/0x$1/;
+ if ( /\b2_[01]+\b/ ) {
+ s/\b2_([01]+)\b/conv$1&&&&/g;
+ while ( /[01][01][01][01]&&&&/ ) {
+ s/0000&&&&/&&&&0/g;
+ s/0001&&&&/&&&&1/g;
+ s/0010&&&&/&&&&2/g;
+ s/0011&&&&/&&&&3/g;
+ s/0100&&&&/&&&&4/g;
+ s/0101&&&&/&&&&5/g;
+ s/0110&&&&/&&&&6/g;
+ s/0111&&&&/&&&&7/g;
+ s/1000&&&&/&&&&8/g;
+ s/1001&&&&/&&&&9/g;
+ s/1010&&&&/&&&&A/g;
+ s/1011&&&&/&&&&B/g;
+ s/1100&&&&/&&&&C/g;
+ s/1101&&&&/&&&&D/g;
+ s/1110&&&&/&&&&E/g;
+ s/1111&&&&/&&&&F/g;
+ }
+ s/000&&&&/&&&&0/g;
+ s/001&&&&/&&&&1/g;
+ s/010&&&&/&&&&2/g;
+ s/011&&&&/&&&&3/g;
+ s/100&&&&/&&&&4/g;
+ s/101&&&&/&&&&5/g;
+ s/110&&&&/&&&&6/g;
+ s/111&&&&/&&&&7/g;
+ s/00&&&&/&&&&0/g;
+ s/01&&&&/&&&&1/g;
+ s/10&&&&/&&&&2/g;
+ s/11&&&&/&&&&3/g;
+ s/0&&&&/&&&&0/g;
+ s/1&&&&/&&&&1/g;
+ s/conv&&&&/0x/g;
+ }
+
+ if ( /commandline/)
+ {
+ if( /-bigend/)
+ {
+ $bigend=1;
+ }
+ }
+
+ if ( /\bDCDU\b/ )
+ {
+ my $cmd=$_;
+ my $value;
+ my $prefix;
+ my $w1;
+ my $w2;
+ my $w3;
+ my $w4;
+
+ s/\s+DCDU\b/@ $&/;
+
+ $cmd =~ /\bDCDU\b\s+0x(\d+)/;
+ $value = $1;
+ $value =~ /(\w\w)(\w\w)(\w\w)(\w\w)/;
+ $w1 = $1;
+ $w2 = $2;
+ $w3 = $3;
+ $w4 = $4;
+
+ if( $bigend ne "")
+ {
+ # big endian
+ $prefix = "\t.byte\t0x".$w1.";".
+ "\t.byte\t0x".$w2.";".
+ "\t.byte\t0x".$w3.";".
+ "\t.byte\t0x".$w4."; ";
+ }
+ else
+ {
+ # little endian
+ $prefix = "\t.byte\t0x".$w4.";".
+ "\t.byte\t0x".$w3.";".
+ "\t.byte\t0x".$w2.";".
+ "\t.byte\t0x".$w1."; ";
+ }
+ $_=$prefix.$_;
+ }
+
+ if ( /\badrl\b/i )
+ {
+ s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i;
+ $addPadding = 1;
+ }
+ s/\bEND\b/@ END/;
+} continue {
+ printf ("%s", $_) if $printit;
+ if ($addPadding != 0)
+ {
+ printf (" mov r0,r0\n");
+ $addPadding = 0;
+ }
+}
+#If we had a code section, mark that this object doesn't need an executable
+# stack.
+if ($nxstack) {
+ printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n");
+}
diff --git a/drivers/opus/celt/arm/arm_celt_map.c b/drivers/opus/celt/arm/arm_celt_map.c
new file mode 100644
index 0000000000..b187345154
--- /dev/null
+++ b/drivers/opus/celt/arm/arm_celt_map.c
@@ -0,0 +1,49 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "pitch.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(OPUS_FIXED_POINT)
+opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+ const opus_val16 *, opus_val32 *, int , int) = {
+ celt_pitch_xcorr_c, /* ARMv4 */
+ MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
+ MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
+ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
+};
+# else
+# error "Floating-point implementation is not supported by ARM asm yet." \
+ "Reconfigure with --disable-rtcd or send patches."
+# endif
+
+#endif
diff --git a/drivers/opus/celt/arm/armcpu.c b/drivers/opus/celt/arm/armcpu.c
new file mode 100644
index 0000000000..7f0af631b9
--- /dev/null
+++ b/drivers/opus/celt/arm/armcpu.c
@@ -0,0 +1,174 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#ifdef OPUS_HAVE_RTCD
+
+#include "armcpu.h"
+#include "cpu_support.h"
+#include "os_support.h"
+#include "opus_types.h"
+
+#define OPUS_CPU_ARM_V4 (1)
+#define OPUS_CPU_ARM_EDSP (1<<1)
+#define OPUS_CPU_ARM_MEDIA (1<<2)
+#define OPUS_CPU_ARM_NEON (1<<3)
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
+ opus_uint32 flags;
+ flags=0;
+ /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
+ * instructions via their assembled hex code.
+ * All of these instructions should be essentially nops. */
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+ __try{
+ /*PLD [r13]*/
+ __emit(0xF5DDF000);
+ flags|=OPUS_CPU_ARM_EDSP;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+ __try{
+ /*SHADD8 r3,r3,r3*/
+ __emit(0xE6333F93);
+ flags|=OPUS_CPU_ARM_MEDIA;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+ __try{
+ /*VORR q0,q0,q0*/
+ __emit(0xF2200150);
+ flags|=OPUS_CPU_ARM_NEON;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# endif
+# endif
+# endif
+ return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capabilities(void)
+{
+ opus_uint32 flags = 0;
+ FILE *cpuinfo;
+
+ /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+ * Android */
+ cpuinfo = fopen("/proc/cpuinfo", "r");
+
+ if(cpuinfo != NULL)
+ {
+ /* 512 should be enough for anybody (it's even enough for all the flags that
+ * x86 has accumulated... so far). */
+ char buf[512];
+
+ while(fgets(buf, 512, cpuinfo) != NULL)
+ {
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
+ /* Search for edsp and neon flag */
+ if(memcmp(buf, "Features", 8) == 0)
+ {
+ char *p;
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+ p = strstr(buf, " edsp");
+ if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ flags |= OPUS_CPU_ARM_EDSP;
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+ p = strstr(buf, " neon");
+ if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ flags |= OPUS_CPU_ARM_NEON;
+# endif
+ }
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+ /* Search for media capabilities (>= ARMv6) */
+ if(memcmp(buf, "CPU architecture:", 17) == 0)
+ {
+ int version;
+ version = atoi(buf+17);
+
+ if(version >= 6)
+ flags |= OPUS_CPU_ARM_MEDIA;
+ }
+# endif
+ }
+
+ fclose(cpuinfo);
+ }
+ return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+ "your platform. Reconfigure with --disable-rtcd (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+ opus_uint32 flags = opus_cpu_capabilities();
+ int arch = 0;
+
+ if(!(flags & OPUS_CPU_ARM_EDSP))
+ return arch;
+ arch++;
+
+ if(!(flags & OPUS_CPU_ARM_MEDIA))
+ return arch;
+ arch++;
+
+ if(!(flags & OPUS_CPU_ARM_NEON))
+ return arch;
+ arch++;
+
+ return arch;
+}
+
+#endif
diff --git a/drivers/opus/celt/arm/armcpu.h b/drivers/opus/celt/arm/armcpu.h
new file mode 100644
index 0000000000..ac5744606e
--- /dev/null
+++ b/drivers/opus/celt/arm/armcpu.h
@@ -0,0 +1,71 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(ARMCPU_H)
+# define ARMCPU_H
+
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+# define MAY_HAVE_EDSP(name) name ## _edsp
+# else
+# define MAY_HAVE_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+# define MAY_HAVE_MEDIA(name) name ## _media
+# else
+# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+# define MAY_HAVE_NEON(name) name ## _neon
+# else
+# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_EDSP)
+# define PRESUME_EDSP(name) name ## _edsp
+# else
+# define PRESUME_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_PRESUME_MEDIA)
+# define PRESUME_MEDIA(name) name ## _media
+# else
+# define PRESUME_MEDIA(name) PRESUME_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_NEON)
+# define PRESUME_NEON(name) name ## _neon
+# else
+# define PRESUME_NEON(name) PRESUME_MEDIA(name)
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
+int opus_select_arch(void);
+# endif
+
+#endif
diff --git a/drivers/opus/celt/arm/armopts.s b/drivers/opus/celt/arm/armopts.s
new file mode 100644
index 0000000000..fb9196072a
--- /dev/null
+++ b/drivers/opus/celt/arm/armopts.s
@@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+; (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP *
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA *
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON *
+
+END
diff --git a/drivers/opus/celt/arm/armopts.s.in b/drivers/opus/celt/arm/armopts.s.in
new file mode 100644
index 0000000000..3d8aaf2754
--- /dev/null
+++ b/drivers/opus/celt/arm/armopts.s.in
@@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+; (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP * @OPUS_ARM_MAY_HAVE_EDSP@
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON * @OPUS_ARM_MAY_HAVE_NEON@
+
+END
diff --git a/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s
new file mode 100644
index 0000000000..09917b16bf
--- /dev/null
+++ b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s
@@ -0,0 +1,545 @@
+; Copyright (c) 2007-2008 CSIRO
+; Copyright (c) 2007-2009 Xiph.Org Foundation
+; Copyright (c) 2013 Parrot
+; Written by Aurélien Zanelli
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ AREA |.text|, CODE, READONLY
+
+ GET celt/arm/armopts.s
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+ EXPORT celt_pitch_xcorr_edsp
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+ EXPORT celt_pitch_xcorr_neon
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+
+; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+xcorr_kernel_neon PROC
+ ; input:
+ ; r3 = int len
+ ; r4 = opus_val16 *x
+ ; r5 = opus_val16 *y
+ ; q0 = opus_val32 sum[4]
+ ; output:
+ ; q0 = opus_val32 sum[4]
+ ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+ ; internal usage:
+ ; r12 = int j
+ ; d3 = y_3|y_2|y_1|y_0
+ ; q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+ ; q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+ ; q8 = scratch
+ ;
+ ; Load y[0...3]
+ ; This requires len>0 to always be valid (which we assert in the C code).
+ VLD1.16 {d5}, [r5]!
+ SUBS r12, r3, #8
+ BLE xcorr_kernel_neon_process4
+; Process 8 samples at a time.
+; This loop loads one y value more than we actually need. Therefore we have to
+; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+; reading past the end of the array.
+xcorr_kernel_neon_process8
+ ; This loop has 19 total instructions (10 cycles to issue, minimum), with
+ ; - 2 cycles of ARM insrtuctions,
+ ; - 10 cycles of load/store/byte permute instructions, and
+ ; - 9 cycles of data processing instructions.
+ ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+ ; latter two categories, meaning the whole loop should run in 10 cycles per
+ ; iteration, barring cache misses.
+ ;
+ ; Load x[0...7]
+ VLD1.16 {d6, d7}, [r4]!
+ ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+ ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+ VAND d3, d5, d5
+ SUBS r12, r12, #8
+ ; Load y[4...11]
+ VLD1.16 {d4, d5}, [r5]!
+ VMLAL.S16 q0, d3, d6[0]
+ VEXT.16 d16, d3, d4, #1
+ VMLAL.S16 q0, d4, d7[0]
+ VEXT.16 d17, d4, d5, #1
+ VMLAL.S16 q0, d16, d6[1]
+ VEXT.16 d16, d3, d4, #2
+ VMLAL.S16 q0, d17, d7[1]
+ VEXT.16 d17, d4, d5, #2
+ VMLAL.S16 q0, d16, d6[2]
+ VEXT.16 d16, d3, d4, #3
+ VMLAL.S16 q0, d17, d7[2]
+ VEXT.16 d17, d4, d5, #3
+ VMLAL.S16 q0, d16, d6[3]
+ VMLAL.S16 q0, d17, d7[3]
+ BGT xcorr_kernel_neon_process8
+; Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4
+ ADDS r12, r12, #4
+ BLE xcorr_kernel_neon_process2
+ ; Load x[0...3]
+ VLD1.16 d6, [r4]!
+ ; Use VAND since it's a data processing instruction again.
+ VAND d4, d5, d5
+ SUB r12, r12, #4
+ ; Load y[4...7]
+ VLD1.16 d5, [r5]!
+ VMLAL.S16 q0, d4, d6[0]
+ VEXT.16 d16, d4, d5, #1
+ VMLAL.S16 q0, d16, d6[1]
+ VEXT.16 d16, d4, d5, #2
+ VMLAL.S16 q0, d16, d6[2]
+ VEXT.16 d16, d4, d5, #3
+ VMLAL.S16 q0, d16, d6[3]
+; Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2
+ ADDS r12, r12, #2
+ BLE xcorr_kernel_neon_process1
+ ; Load x[0...1]
+ VLD2.16 {d6[],d7[]}, [r4]!
+ ; Use VAND since it's a data processing instruction again.
+ VAND d4, d5, d5
+ SUB r12, r12, #2
+ ; Load y[4...5]
+ VLD1.32 {d5[]}, [r5]!
+ VMLAL.S16 q0, d4, d6
+ VEXT.16 d16, d4, d5, #1
+ ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+ ; instead of VEXT, since it's a data-processing instruction.
+ VSRI.64 d5, d4, #32
+ VMLAL.S16 q0, d16, d7
+; Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1
+ ; Load next *x
+ VLD1.16 {d6[]}, [r4]!
+ ADDS r12, r12, #1
+ ; y[0...3] are left in d5 from prior iteration(s) (if any)
+ VMLAL.S16 q0, d5, d6
+ MOVLE pc, lr
+; Now process 1 last sample, not reading ahead.
+ ; Load last *y
+ VLD1.16 {d4[]}, [r5]!
+ VSRI.64 d4, d5, #16
+ ; Load last *x
+ VLD1.16 {d6[]}, [r4]!
+ VMLAL.S16 q0, d4, d6
+ MOV pc, lr
+ ENDP
+
+; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+; opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr_neon PROC
+ ; input:
+ ; r0 = opus_val16 *_x
+ ; r1 = opus_val16 *_y
+ ; r2 = opus_val32 *xcorr
+ ; r3 = int len
+ ; output:
+ ; r0 = int maxcorr
+ ; internal usage:
+ ; r4 = opus_val16 *x (for xcorr_kernel_neon())
+ ; r5 = opus_val16 *y (for xcorr_kernel_neon())
+ ; r6 = int max_pitch
+ ; r12 = int j
+ ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+ STMFD sp!, {r4-r6, lr}
+ LDR r6, [sp, #16]
+ VMOV.S32 q15, #1
+ ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+ SUBS r6, r6, #4
+ BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4
+ ; xcorr_kernel_neon parameters:
+ ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+ MOV r4, r0
+ MOV r5, r1
+ VEOR q0, q0, q0
+ ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+ ; So we don't save/restore any other registers.
+ BL xcorr_kernel_neon
+ SUBS r6, r6, #4
+ VST1.32 {q0}, [r2]!
+ ; _y += 4
+ ADD r1, r1, #8
+ VMAX.S32 q15, q15, q0
+ ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+ BGE celt_pitch_xcorr_neon_process4
+; We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done
+ ADDS r6, r6, #4
+ ; Reduce maxcorr to a single value
+ VMAX.S32 d30, d30, d31
+ VPMAX.S32 d30, d30, d30
+ ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+ BLE celt_pitch_xcorr_neon_done
+; Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining
+ MOV r4, r0
+ MOV r5, r1
+ VMOV.I32 q0, #0
+ SUBS r12, r3, #8
+ BLT celt_pitch_xcorr_neon_process_remaining4
+; Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8
+ ; Load x[0...7]
+ VLD1.16 {q1}, [r4]!
+ ; Load y[0...7]
+ VLD1.16 {q2}, [r5]!
+ SUBS r12, r12, #8
+ VMLAL.S16 q0, d4, d2
+ VMLAL.S16 q0, d5, d3
+ BGE celt_pitch_xcorr_neon_process_remaining_loop8
+; Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4
+ ADDS r12, r12, #4
+ BLT celt_pitch_xcorr_neon_process_remaining4_done
+ ; Load x[0...3]
+ VLD1.16 {d2}, [r4]!
+ ; Load y[0...3]
+ VLD1.16 {d3}, [r5]!
+ SUB r12, r12, #4
+ VMLAL.S16 q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done
+ ; Reduce the sum to a single value.
+ VADD.S32 d0, d0, d1
+ VPADDL.S32 d0, d0
+ ADDS r12, r12, #4
+ BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+; Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1
+ VLD1.16 {d2[]}, [r4]!
+ VLD1.16 {d3[]}, [r5]!
+ SUBS r12, r12, #1
+ VMLAL.S16 q0, d2, d3
+ BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done
+ VST1.32 {d0[0]}, [r2]!
+ VMAX.S32 d30, d30, d0
+ SUBS r6, r6, #1
+ ; _y++
+ ADD r1, r1, #2
+ ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+ BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done
+ VMOV.32 r0, d30[0]
+ LDMFD sp!, {r4-r6, pc}
+ ENDP
+
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+
+; This will get used on ARMv7 devices without NEON, so it has been optimized
+; to take advantage of dual-issuing where possible.
+xcorr_kernel_edsp PROC
+ ; input:
+ ; r3 = int len
+ ; r4 = opus_val16 *_x (must be 32-bit aligned)
+ ; r5 = opus_val16 *_y (must be 32-bit aligned)
+ ; r6...r9 = opus_val32 sum[4]
+ ; output:
+ ; r6...r9 = opus_val32 sum[4]
+ ; preserved: r0-r5
+ ; internal usage
+ ; r2 = int j
+ ; r12,r14 = opus_val16 x[4]
+ ; r10,r11 = opus_val16 y[4]
+ STMFD sp!, {r2,r4,r5,lr}
+ LDR r10, [r5], #4 ; Load y[0...1]
+ SUBS r2, r3, #4 ; j = len-4
+ LDR r11, [r5], #4 ; Load y[2...3]
+ BLE xcorr_kernel_edsp_process4_done
+ LDR r12, [r4], #4 ; Load x[0...1]
+ ; Stall
+xcorr_kernel_edsp_process4
+ ; The multiplies must issue from pipeline 0, and can't dual-issue with each
+ ; other. Every other instruction here dual-issues with a multiply, and is
+ ; thus "free". There should be no stalls in the body of the loop.
+ SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_0,y_0)
+ LDR r14, [r4], #4 ; Load x[2...3]
+ SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x_0,y_1)
+ SUBS r2, r2, #4 ; j-=4
+ SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_0,y_2)
+ SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x_0,y_3)
+ SMLATT r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_1,y_1)
+ LDR r10, [r5], #4 ; Load y[4...5]
+ SMLATB r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],x_1,y_2)
+ SMLATT r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_1,y_3)
+ SMLATB r9, r12, r10, r9 ; sum[3] = MAC16_16(sum[3],x_1,y_4)
+ LDRGT r12, [r4], #4 ; Load x[0...1]
+ SMLABB r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_2,y_2)
+ SMLABT r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x_2,y_3)
+ SMLABB r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_2,y_4)
+ SMLABT r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x_2,y_5)
+ SMLATT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_3,y_3)
+ LDR r11, [r5], #4 ; Load y[6...7]
+ SMLATB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],x_3,y_4)
+ SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5)
+ SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6)
+ BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done
+ ADDS r2, r2, #4
+ BLE xcorr_kernel_edsp_done
+ LDRH r12, [r4], #2 ; r12 = *x++
+ SUBS r2, r2, #1 ; j--
+ ; Stall
+ SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0)
+ LDRGTH r14, [r4], #2 ; r14 = *x++
+ SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1)
+ SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2)
+ SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3)
+ BLE xcorr_kernel_edsp_done
+ SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1)
+ SUBS r2, r2, #1 ; j--
+ SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2)
+ LDRH r10, [r5], #2 ; r10 = y_4 = *y++
+ SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3)
+ LDRGTH r12, [r4], #2 ; r12 = *x++
+ SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4)
+ BLE xcorr_kernel_edsp_done
+ SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2)
+ CMP r2, #1 ; j--
+ SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3)
+ LDRH r2, [r5], #2 ; r2 = y_5 = *y++
+ SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4)
+ LDRGTH r14, [r4] ; r14 = *x
+ SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5)
+ BLE xcorr_kernel_edsp_done
+ SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3)
+ LDRH r11, [r5] ; r11 = y_6 = *y
+ SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4)
+ SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5)
+ SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done
+ LDMFD sp!, {r2,r4,r5,pc}
+ ENDP
+
+celt_pitch_xcorr_edsp PROC
+ ; input:
+ ; r0 = opus_val16 *_x (must be 32-bit aligned)
+ ; r1 = opus_val16 *_y (only needs to be 16-bit aligned)
+ ; r2 = opus_val32 *xcorr
+ ; r3 = int len
+ ; output:
+ ; r0 = maxcorr
+ ; internal usage
+ ; r4 = opus_val16 *x
+ ; r5 = opus_val16 *y
+ ; r6 = opus_val32 sum0
+ ; r7 = opus_val32 sum1
+ ; r8 = opus_val32 sum2
+ ; r9 = opus_val32 sum3
+ ; r1 = int max_pitch
+ ; r12 = int j
+ STMFD sp!, {r4-r11, lr}
+ MOV r5, r1
+ LDR r1, [sp, #36]
+ MOV r4, r0
+ TST r5, #3
+ ; maxcorr = 1
+ MOV r0, #1
+ BEQ celt_pitch_xcorr_edsp_process1u_done
+; Compute one sum at the start to make y 32-bit aligned.
+ SUBS r12, r3, #4
+ ; r14 = sum = 0
+ MOV r14, #0
+ LDRH r8, [r5], #2
+ BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+ LDR r6, [r4], #4
+ MOV r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4
+ LDR r9, [r5], #4
+ SMLABT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0)
+ LDR r7, [r4], #4
+ SMLATB r14, r6, r9, r14 ; sum = MAC16_16(sum, x_1, y_1)
+ LDR r8, [r5], #4
+ SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2)
+ SUBS r12, r12, #4 ; j-=4
+ SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3)
+ LDRGT r6, [r4], #4
+ BGT celt_pitch_xcorr_edsp_process1u_loop4
+ MOV r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done
+ ADDS r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1
+ LDRGEH r6, [r4], #2
+ ; Stall
+ SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
+ SUBGES r12, r12, #1
+ LDRGTH r8, [r5], #2
+ BGT celt_pitch_xcorr_edsp_process1u_loop1
+ ; Restore _x
+ SUB r4, r4, r3, LSL #1
+ ; Restore and advance _y
+ SUB r5, r5, r3, LSL #1
+ ; maxcorr = max(maxcorr, sum)
+ CMP r0, r14
+ ADD r5, r5, #2
+ MOVLT r0, r14
+ SUBS r1, r1, #1
+ ; xcorr[i] = sum
+ STR r14, [r2], #4
+ BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done
+ ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+ SUBS r1, r1, #4
+ BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4
+ ; xcorr_kernel_edsp parameters:
+ ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+ MOV r6, #0
+ MOV r7, #0
+ MOV r8, #0
+ MOV r9, #0
+ BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+ ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+ CMP r0, r6
+ ; _y+=4
+ ADD r5, r5, #8
+ MOVLT r0, r6
+ CMP r0, r7
+ MOVLT r0, r7
+ CMP r0, r8
+ MOVLT r0, r8
+ CMP r0, r9
+ MOVLT r0, r9
+ STMIA r2!, {r6-r9}
+ SUBS r1, r1, #4
+ BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2
+ ADDS r1, r1, #2
+ BLT celt_pitch_xcorr_edsp_process1a
+ SUBS r12, r3, #4
+ ; {r10, r11} = {sum0, sum1} = {0, 0}
+ MOV r10, #0
+ MOV r11, #0
+ LDR r8, [r5], #4
+ BLE celt_pitch_xcorr_edsp_process2_loop_done
+ LDR r6, [r4], #4
+ LDR r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4
+ SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
+ LDR r7, [r4], #4
+ SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
+ SUBS r12, r12, #4 ; j-=4
+ SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1)
+ LDR r8, [r5], #4
+ SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2)
+ LDRGT r6, [r4], #4
+ SMLABB r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_2, y_2)
+ SMLABT r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_2, y_3)
+ SMLATT r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_3, y_3)
+ LDRGT r9, [r5], #4
+ SMLATB r11, r7, r8, r11 ; sum1 = MAC16_16(sum1, x_3, y_4)
+ BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done
+ ADDS r12, r12, #2
+ BLE celt_pitch_xcorr_edsp_process2_1
+ LDR r6, [r4], #4
+ ; Stall
+ SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
+ LDR r9, [r5], #4
+ SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
+ SUB r12, r12, #2
+ SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1)
+ MOV r8, r9
+ SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1
+ LDRH r6, [r4], #2
+ ADDS r12, r12, #1
+ ; Stall
+ SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
+ LDRGTH r7, [r4], #2
+ SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
+ BLE celt_pitch_xcorr_edsp_process2_done
+ LDRH r9, [r5], #2
+ SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1)
+ SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done
+ ; Restore _x
+ SUB r4, r4, r3, LSL #1
+ ; Restore and advance _y
+ SUB r5, r5, r3, LSL #1
+ ; maxcorr = max(maxcorr, sum0)
+ CMP r0, r10
+ ADD r5, r5, #2
+ MOVLT r0, r10
+ SUB r1, r1, #2
+ ; maxcorr = max(maxcorr, sum1)
+ CMP r0, r11
+ ; xcorr[i] = sum
+ STR r10, [r2], #4
+ MOVLT r0, r11
+ STR r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a
+ ADDS r1, r1, #1
+ BLT celt_pitch_xcorr_edsp_done
+ SUBS r12, r3, #4
+ ; r14 = sum = 0
+ MOV r14, #0
+ BLT celt_pitch_xcorr_edsp_process1a_loop_done
+ LDR r6, [r4], #4
+ LDR r8, [r5], #4
+ LDR r7, [r4], #4
+ LDR r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4
+ SMLABB r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0)
+ SUBS r12, r12, #4 ; j-=4
+ SMLATT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1)
+ LDRGE r6, [r4], #4
+ SMLABB r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2)
+ LDRGE r8, [r5], #4
+ SMLATT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_3, y_3)
+ LDRGE r7, [r4], #4
+ LDRGE r9, [r5], #4
+ BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done
+ ADDS r12, r12, #2
+ LDRGE r6, [r4], #4
+ LDRGE r8, [r5], #4
+ ; Stall
+ SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0)
+ SUBGE r12, r12, #2
+ SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1)
+ ADDS r12, r12, #1
+ LDRGEH r6, [r4], #2
+ LDRGEH r8, [r5], #2
+ ; Stall
+ SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
+ ; maxcorr = max(maxcorr, sum)
+ CMP r0, r14
+ ; xcorr[i] = sum
+ STR r14, [r2], #4
+ MOVLT r0, r14
+celt_pitch_xcorr_edsp_done
+ LDMFD sp!, {r4-r11, pc}
+ ENDP
+
+ENDIF
+
+END
diff --git a/drivers/opus/celt/arm/fixed_armv4.h b/drivers/opus/celt/arm/fixed_armv4.h
new file mode 100644
index 0000000000..b690bc8cea
--- /dev/null
+++ b/drivers/opus/celt/arm/fixed_armv4.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 2013 Xiph.Org Foundation and contributors */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv4_H
+#define FIXED_ARMv4_H
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#MULT16_32_Q16\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=&r"(rd_lo), "=&r"(rd_hi)
+ : "%r"(b),"r"(a<<16)
+ );
+ return rd_hi;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#MULT16_32_Q15\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=&r"(rd_lo), "=&r"(rd_hi)
+ : "%r"(b), "r"(a<<16)
+ );
+ /*We intentionally don't OR in the high bit of rd_lo for speed.*/
+ return rd_hi<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+ b must fit in 31 bits.
+ Result fits in 32 bits. */
+#undef MAC16_32_Q15
+#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#undef MULT32_32_Q31
+#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31)
+
+#endif
diff --git a/drivers/opus/celt/arm/fixed_armv5e.h b/drivers/opus/celt/arm/fixed_armv5e.h
new file mode 100644
index 0000000000..1194a7d3ec
--- /dev/null
+++ b/drivers/opus/celt/arm/fixed_armv5e.h
@@ -0,0 +1,116 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+ Copyright (C) 2003-2008 Jean-Marc Valin
+ Copyright (C) 2007-2008 CSIRO
+ Copyright (C) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv5E_H
+#define FIXED_ARMv5E_H
+
+#include "fixed_armv4.h"
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+{
+ int res;
+ __asm__(
+ "#MULT16_32_Q16\n\t"
+ "smulwb %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(b),"r"(a)
+ );
+ return res;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+{
+ int res;
+ __asm__(
+ "#MULT16_32_Q15\n\t"
+ "smulwb %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(a)
+ );
+ return res<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+ b must fit in 31 bits.
+ Result fits in 32 bits. */
+#undef MAC16_32_Q15
+static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+ int res;
+ __asm__(
+ "#MAC16_32_Q15\n\t"
+ "smlawb %0, %1, %2, %3;\n"
+ : "=r"(res)
+ : "r"(b<<1), "r"(a), "r"(c)
+ );
+ return res;
+}
+#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#undef MAC16_16
+static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val16 b)
+{
+ int res;
+ __asm__(
+ "#MAC16_16\n\t"
+ "smlabb %0, %1, %2, %3;\n"
+ : "=r"(res)
+ : "r"(a), "r"(b), "r"(c)
+ );
+ return res;
+}
+#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
+
+/** 16x16 multiplication where the result fits in 32 bits */
+#undef MULT16_16
+static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+{
+ int res;
+ __asm__(
+ "#MULT16_16\n\t"
+ "smulbb %0, %1, %2;\n"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
+
+#endif
diff --git a/drivers/opus/celt/arm/kiss_fft_armv4.h b/drivers/opus/celt/arm/kiss_fft_armv4.h
new file mode 100644
index 0000000000..773464628b
--- /dev/null
+++ b/drivers/opus/celt/arm/kiss_fft_armv4.h
@@ -0,0 +1,121 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv4_H
+#define KISS_FFT_ARMv4_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef OPUS_FIXED_POINT
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+ do{ \
+ int br__; \
+ int bi__; \
+ int tt__; \
+ __asm__ __volatile__( \
+ "#C_MUL\n\t" \
+ "ldrsh %[br], [%[bp], #0]\n\t" \
+ "ldm %[ap], {r0,r1}\n\t" \
+ "ldrsh %[bi], [%[bp], #2]\n\t" \
+ "smull %[tt], %[mi], r1, %[br]\n\t" \
+ "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+ "rsb %[bi], %[bi], #0\n\t" \
+ "smull %[br], %[mr], r0, %[br]\n\t" \
+ "mov %[tt], %[tt], lsr #15\n\t" \
+ "smlal %[br], %[mr], r1, %[bi]\n\t" \
+ "orr %[mi], %[tt], %[mi], lsl #17\n\t" \
+ "mov %[br], %[br], lsr #15\n\t" \
+ "orr %[mr], %[br], %[mr], lsl #17\n\t" \
+ : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+ [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+ : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+ : "r0", "r1" \
+ ); \
+ } \
+ while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+ do{ \
+ int br__; \
+ int bi__; \
+ int tt__; \
+ __asm__ __volatile__( \
+ "#C_MUL4\n\t" \
+ "ldrsh %[br], [%[bp], #0]\n\t" \
+ "ldm %[ap], {r0,r1}\n\t" \
+ "ldrsh %[bi], [%[bp], #2]\n\t" \
+ "smull %[tt], %[mi], r1, %[br]\n\t" \
+ "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+ "rsb %[bi], %[bi], #0\n\t" \
+ "smull %[br], %[mr], r0, %[br]\n\t" \
+ "mov %[tt], %[tt], lsr #17\n\t" \
+ "smlal %[br], %[mr], r1, %[bi]\n\t" \
+ "orr %[mi], %[tt], %[mi], lsl #15\n\t" \
+ "mov %[br], %[br], lsr #17\n\t" \
+ "orr %[mr], %[br], %[mr], lsl #15\n\t" \
+ : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+ [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+ : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+ : "r0", "r1" \
+ ); \
+ } \
+ while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+ do{ \
+ int br__; \
+ int bi__; \
+ int tt__; \
+ __asm__ __volatile__( \
+ "#C_MULC\n\t" \
+ "ldrsh %[br], [%[bp], #0]\n\t" \
+ "ldm %[ap], {r0,r1}\n\t" \
+ "ldrsh %[bi], [%[bp], #2]\n\t" \
+ "smull %[tt], %[mr], r0, %[br]\n\t" \
+ "smlal %[tt], %[mr], r1, %[bi]\n\t" \
+ "rsb %[bi], %[bi], #0\n\t" \
+ "smull %[br], %[mi], r1, %[br]\n\t" \
+ "mov %[tt], %[tt], lsr #15\n\t" \
+ "smlal %[br], %[mi], r0, %[bi]\n\t" \
+ "orr %[mr], %[tt], %[mr], lsl #17\n\t" \
+ "mov %[br], %[br], lsr #15\n\t" \
+ "orr %[mi], %[br], %[mi], lsl #17\n\t" \
+ : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+ [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+ : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+ : "r0", "r1" \
+ ); \
+ } \
+ while(0)
+
+#endif /* OPUS_FIXED_POINT */
+
+#endif /* KISS_FFT_ARMv4_H */
diff --git a/drivers/opus/celt/arm/kiss_fft_armv5e.h b/drivers/opus/celt/arm/kiss_fft_armv5e.h
new file mode 100644
index 0000000000..1eff56a66a
--- /dev/null
+++ b/drivers/opus/celt/arm/kiss_fft_armv5e.h
@@ -0,0 +1,118 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv5E_H
+#define KISS_FFT_ARMv5E_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef OPUS_FIXED_POINT
+
+#if defined(__thumb__)||defined(__thumb2__)
+#define LDRD_CONS "Q"
+#else
+#define LDRD_CONS "Uq"
+#endif
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+ do{ \
+ int mr1__; \
+ int mr2__; \
+ int mi__; \
+ long long aval__; \
+ int bval__; \
+ __asm__( \
+ "#C_MUL\n\t" \
+ "ldrd %[aval], %H[aval], %[ap]\n\t" \
+ "ldr %[bval], %[bp]\n\t" \
+ "smulwb %[mi], %H[aval], %[bval]\n\t" \
+ "smulwb %[mr1], %[aval], %[bval]\n\t" \
+ "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+ "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+ : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+ [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+ : [ap]LDRD_CONS(a), [bp]"m"(b) \
+ ); \
+ (m).r = SHL32(SUB32(mr1__, mr2__), 1); \
+ (m).i = SHL32(mi__, 1); \
+ } \
+ while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+ do{ \
+ int mr1__; \
+ int mr2__; \
+ int mi__; \
+ long long aval__; \
+ int bval__; \
+ __asm__( \
+ "#C_MUL4\n\t" \
+ "ldrd %[aval], %H[aval], %[ap]\n\t" \
+ "ldr %[bval], %[bp]\n\t" \
+ "smulwb %[mi], %H[aval], %[bval]\n\t" \
+ "smulwb %[mr1], %[aval], %[bval]\n\t" \
+ "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+ "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+ : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+ [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+ : [ap]LDRD_CONS(a), [bp]"m"(b) \
+ ); \
+ (m).r = SHR32(SUB32(mr1__, mr2__), 1); \
+ (m).i = SHR32(mi__, 1); \
+ } \
+ while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+ do{ \
+ int mr__; \
+ int mi1__; \
+ int mi2__; \
+ long long aval__; \
+ int bval__; \
+ __asm__( \
+ "#C_MULC\n\t" \
+ "ldrd %[aval], %H[aval], %[ap]\n\t" \
+ "ldr %[bval], %[bp]\n\t" \
+ "smulwb %[mr], %[aval], %[bval]\n\t" \
+ "smulwb %[mi1], %H[aval], %[bval]\n\t" \
+ "smulwt %[mi2], %[aval], %[bval]\n\t" \
+ "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \
+ : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \
+ [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+ : [ap]LDRD_CONS(a), [bp]"m"(b) \
+ ); \
+ (m).r = SHL32(mr__, 1); \
+ (m).i = SHL32(SUB32(mi1__, mi2__), 1); \
+ } \
+ while(0)
+
+#endif /* OPUS_FIXED_POINT */
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/drivers/opus/celt/arm/pitch_arm.h b/drivers/opus/celt/arm/pitch_arm.h
new file mode 100644
index 0000000000..df5e82ef0b
--- /dev/null
+++ b/drivers/opus/celt/arm/pitch_arm.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(PITCH_ARM_H)
+# define PITCH_ARM_H
+
+# include "armcpu.h"
+
+# if defined(OPUS_FIXED_POINT)
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
+ opus_val32 *xcorr, int len, int max_pitch);
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+# define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
+ opus_val32 *xcorr, int len, int max_pitch);
+# endif
+
+# if !defined(OPUS_HAVE_RTCD)
+# define OVERRIDE_PITCH_XCORR (1)
+# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+ ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+# endif
+
+# endif
+
+#endif
diff --git a/drivers/opus/celt/bands.c b/drivers/opus/celt/bands.c
new file mode 100644
index 0000000000..87280c8333
--- /dev/null
+++ b/drivers/opus/celt/bands.c
@@ -0,0 +1,1518 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2008-2009 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <math.h>
+#include "bands.h"
+#include "opus_modes.h"
+#include "vq.h"
+#include "cwrs.h"
+#include "stack_alloc.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "rate.h"
+#include "quant_bands.h"
+#include "pitch.h"
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+ int i;
+ for (i=0;i<N;i++)
+ {
+ if (val < thresholds[i])
+ break;
+ }
+ if (i>prev && val < thresholds[prev]+hysteresis[prev])
+ i=prev;
+ if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+ i=prev;
+ return i;
+}
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed)
+{
+ return 1664525 * seed + 1013904223;
+}
+
+/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
+ with this approximation is important because it has an impact on the bit allocation */
+static opus_int16 bitexact_cos(opus_int16 x)
+{
+ opus_int32 tmp;
+ opus_int16 x2;
+ tmp = (4096+((opus_int32)(x)*(x)))>>13;
+ celt_assert(tmp<=32767);
+ x2 = tmp;
+ x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
+ celt_assert(x2<=32766);
+ return 1+x2;
+}
+
+static int bitexact_log2tan(int isin,int icos)
+{
+ int lc;
+ int ls;
+ lc=EC_ILOG(icos);
+ ls=EC_ILOG(isin);
+ icos<<=15-lc;
+ isin<<=15-ls;
+ return (ls-lc)*(1<<11)
+ +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
+ -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
+}
+
+#ifdef OPUS_FIXED_POINT
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+{
+ int i, c, N;
+ const opus_int16 *eBands = m->eBands;
+ N = M*m->shortMdctSize;
+ c=0; do {
+ for (i=0;i<end;i++)
+ {
+ int j;
+ opus_val32 maxval=0;
+ opus_val32 sum = 0;
+
+ j=M*eBands[i]; do {
+ maxval = MAX32(maxval, X[j+c*N]);
+ maxval = MAX32(maxval, -X[j+c*N]);
+ } while (++j<M*eBands[i+1]);
+
+ if (maxval > 0)
+ {
+ int shift = celt_ilog2(maxval)-10;
+ j=M*eBands[i]; do {
+ sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
+ EXTRACT16(VSHR32(X[j+c*N],shift)));
+ } while (++j<M*eBands[i+1]);
+ /* We're adding one here to ensure the normalized band isn't larger than unity norm */
+ bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
+ } else {
+ bandE[i+c*m->nbEBands] = EPSILON;
+ }
+ /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+ }
+ } while (++c<C);
+ /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+ int i, c, N;
+ const opus_int16 *eBands = m->eBands;
+ N = M*m->shortMdctSize;
+ c=0; do {
+ i=0; do {
+ opus_val16 g;
+ int j,shift;
+ opus_val16 E;
+ shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
+ E = VSHR32(bandE[i+c*m->nbEBands], shift);
+ g = EXTRACT16(celt_rcp(SHL32(E,3)));
+ j=M*eBands[i]; do {
+ X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
+ } while (++j<M*eBands[i+1]);
+ } while (++i<end);
+ } while (++c<C);
+}
+
+#else /* OPUS_FIXED_POINT */
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+{
+ int i, c, N;
+ const opus_int16 *eBands = m->eBands;
+ N = M*m->shortMdctSize;
+ c=0; do {
+ for (i=0;i<end;i++)
+ {
+ int j;
+ opus_val32 sum = 1e-27f;
+ for (j=M*eBands[i];j<M*eBands[i+1];j++)
+ sum += X[j+c*N]*X[j+c*N];
+ bandE[i+c*m->nbEBands] = celt_sqrt(sum);
+ /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+ }
+ } while (++c<C);
+ /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+ int i, c, N;
+ const opus_int16 *eBands = m->eBands;
+ N = M*m->shortMdctSize;
+ c=0; do {
+ for (i=0;i<end;i++)
+ {
+ int j;
+ opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
+ for (j=M*eBands[i];j<M*eBands[i+1];j++)
+ X[j+c*N] = freq[j+c*N]*g;
+ }
+ } while (++c<C);
+}
+
+#endif /* OPUS_FIXED_POINT */
+
+/* De-normalise the energy to produce the synthesis from the unit-energy bands */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+ celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
+{
+ int i, c, N;
+ const opus_int16 *eBands = m->eBands;
+ N = M*m->shortMdctSize;
+ celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
+ c=0; do {
+ celt_sig * OPUS_RESTRICT f;
+ const celt_norm * OPUS_RESTRICT x;
+ f = freq+c*N;
+ x = X+c*N+M*eBands[start];
+ for (i=0;i<M*eBands[start];i++)
+ *f++ = 0;
+ for (i=start;i<end;i++)
+ {
+ int j, band_end;
+ opus_val16 g;
+ opus_val16 lg;
+#ifdef OPUS_FIXED_POINT
+ int shift;
+#endif
+ j=M*eBands[i];
+ band_end = M*eBands[i+1];
+ lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+#ifndef OPUS_FIXED_POINT
+ g = celt_exp2(lg);
+#else
+ /* Handle the integer part of the log energy */
+ shift = 16-(lg>>DB_SHIFT);
+ if (shift>31)
+ {
+ shift=0;
+ g=0;
+ } else {
+ /* Handle the fractional part. */
+ g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+ }
+ /* Handle extreme gains with negative shift. */
+ if (shift<0)
+ {
+ /* For shift < -2 we'd be likely to overflow, so we're capping
+ the gain here. This shouldn't happen unless the bitstream is
+ already corrupted. */
+ if (shift < -2)
+ {
+ g = 32767;
+ shift = -2;
+ }
+ do {
+ *f++ = SHL32(MULT16_16(*x++, g), -shift);
+ } while (++j<band_end);
+ } else
+#endif
+ /* Be careful of the fixed-point "else" just above when changing this code */
+ do {
+ *f++ = SHR32(MULT16_16(*x++, g), shift);
+ } while (++j<band_end);
+ }
+ celt_assert(start <= end);
+ for (i=M*eBands[end];i<N;i++)
+ *f++ = 0;
+ } while (++c<C);
+}
+
+/* This prevents energy collapse for transients with multiple short MDCTs */
+void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
+ int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
+ opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
+{
+ int c, i, j, k;
+ for (i=start;i<end;i++)
+ {
+ int N0;
+ opus_val16 thresh, sqrt_1;
+ int depth;
+#ifdef OPUS_FIXED_POINT
+ int shift;
+ opus_val32 thresh32;
+#endif
+
+ N0 = m->eBands[i+1]-m->eBands[i];
+ /* depth in 1/8 bits */
+ depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
+
+#ifdef OPUS_FIXED_POINT
+ thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
+ thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
+ {
+ opus_val32 t;
+ t = N0<<LM;
+ shift = celt_ilog2(t)>>1;
+ t = SHL32(t, (7-shift)<<1);
+ sqrt_1 = celt_rsqrt_norm(t);
+ }
+#else
+ thresh = .5f*celt_exp2(-.125f*depth);
+ sqrt_1 = celt_rsqrt(N0<<LM);
+#endif
+
+ c=0; do
+ {
+ celt_norm *X;
+ opus_val16 prev1;
+ opus_val16 prev2;
+ opus_val32 Ediff;
+ opus_val16 r;
+ int renormalize=0;
+ prev1 = prev1logE[c*m->nbEBands+i];
+ prev2 = prev2logE[c*m->nbEBands+i];
+ if (C==1)
+ {
+ prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
+ prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
+ }
+ Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
+ Ediff = MAX32(0, Ediff);
+
+#ifdef OPUS_FIXED_POINT
+ if (Ediff < 16384)
+ {
+ opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
+ r = 2*MIN16(16383,r32);
+ } else {
+ r = 0;
+ }
+ if (LM==3)
+ r = MULT16_16_Q14(23170, MIN32(23169, r));
+ r = SHR16(MIN16(thresh, r),1);
+ r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
+#else
+ /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
+ short blocks don't have the same energy as long */
+ r = 2.f*celt_exp2(-Ediff);
+ if (LM==3)
+ r *= 1.41421356f;
+ r = MIN16(thresh, r);
+ r = r*sqrt_1;
+#endif
+ X = X_+c*size+(m->eBands[i]<<LM);
+ for (k=0;k<1<<LM;k++)
+ {
+ /* Detect collapse */
+ if (!(collapse_masks[i*C+c]&1<<k))
+ {
+ /* Fill with noise */
+ for (j=0;j<N0;j++)
+ {
+ seed = celt_lcg_rand(seed);
+ X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
+ }
+ renormalize = 1;
+ }
+ }
+ /* We just added some energy, so we need to renormalise */
+ if (renormalize)
+ renormalise_vector(X, N0<<LM, Q15ONE);
+ } while (++c<C);
+ }
+}
+
+static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
+{
+ int i = bandID;
+ int j;
+ opus_val16 a1, a2;
+ opus_val16 left, right;
+ opus_val16 norm;
+#ifdef OPUS_FIXED_POINT
+ int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
+#endif
+ left = VSHR32(bandE[i],shift);
+ right = VSHR32(bandE[i+m->nbEBands],shift);
+ norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
+ a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
+ a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
+ for (j=0;j<N;j++)
+ {
+ celt_norm r, l;
+ l = X[j];
+ r = Y[j];
+ X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
+ /* Side is not encoded, no need to calculate */
+ }
+}
+
+static void stereo_split(celt_norm *X, celt_norm *Y, int N)
+{
+ int j;
+ for (j=0;j<N;j++)
+ {
+ celt_norm r, l;
+ l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
+ r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
+ X[j] = l+r;
+ Y[j] = r-l;
+ }
+}
+
+static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
+{
+ int j;
+ opus_val32 xp=0, side=0;
+ opus_val32 El, Er;
+ opus_val16 mid2;
+#ifdef OPUS_FIXED_POINT
+ int kl, kr;
+#endif
+ opus_val32 t, lgain, rgain;
+
+ /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
+ dual_inner_prod(Y, X, Y, N, &xp, &side);
+ /* Compensating for the mid normalization */
+ xp = MULT16_32_Q15(mid, xp);
+ /* mid and side are in Q15, not Q14 like X and Y */
+ mid2 = SHR32(mid, 1);
+ El = MULT16_16(mid2, mid2) + side - 2*xp;
+ Er = MULT16_16(mid2, mid2) + side + 2*xp;
+ if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
+ {
+ for (j=0;j<N;j++)
+ Y[j] = X[j];
+ return;
+ }
+
+#ifdef OPUS_FIXED_POINT
+ kl = celt_ilog2(El)>>1;
+ kr = celt_ilog2(Er)>>1;
+#endif
+ t = VSHR32(El, (kl-7)<<1);
+ lgain = celt_rsqrt_norm(t);
+ t = VSHR32(Er, (kr-7)<<1);
+ rgain = celt_rsqrt_norm(t);
+
+#ifdef OPUS_FIXED_POINT
+ if (kl < 7)
+ kl = 7;
+ if (kr < 7)
+ kr = 7;
+#endif
+
+ for (j=0;j<N;j++)
+ {
+ celt_norm r, l;
+ /* Apply mid scaling (side is already scaled) */
+ l = MULT16_16_Q15(mid, X[j]);
+ r = Y[j];
+ X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
+ Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
+ }
+}
+
+/* Decide whether we should spread the pulses in the current frame */
+int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+ int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+ int end, int C, int M)
+{
+ int i, c, N0;
+ int sum = 0, nbBands=0;
+ const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+ int decision;
+ int hf_sum=0;
+
+ celt_assert(end>0);
+
+ N0 = M*m->shortMdctSize;
+
+ if (M*(eBands[end]-eBands[end-1]) <= 8)
+ return SPREAD_NONE;
+ c=0; do {
+ for (i=0;i<end;i++)
+ {
+ int j, N, tmp=0;
+ int tcount[3] = {0,0,0};
+ celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
+ N = M*(eBands[i+1]-eBands[i]);
+ if (N<=8)
+ continue;
+ /* Compute rough CDF of |x[j]| */
+ for (j=0;j<N;j++)
+ {
+ opus_val32 x2N; /* Q13 */
+
+ x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
+ if (x2N < QCONST16(0.25f,13))
+ tcount[0]++;
+ if (x2N < QCONST16(0.0625f,13))
+ tcount[1]++;
+ if (x2N < QCONST16(0.015625f,13))
+ tcount[2]++;
+ }
+
+ /* Only include four last bands (8 kHz and up) */
+ if (i>m->nbEBands-4)
+ hf_sum += 32*(tcount[1]+tcount[0])/N;
+ tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
+ sum += tmp*256;
+ nbBands++;
+ }
+ } while (++c<C);
+
+ if (update_hf)
+ {
+ if (hf_sum)
+ hf_sum /= C*(4-m->nbEBands+end);
+ *hf_average = (*hf_average+hf_sum)>>1;
+ hf_sum = *hf_average;
+ if (*tapset_decision==2)
+ hf_sum += 4;
+ else if (*tapset_decision==0)
+ hf_sum -= 4;
+ if (hf_sum > 22)
+ *tapset_decision=2;
+ else if (hf_sum > 18)
+ *tapset_decision=1;
+ else
+ *tapset_decision=0;
+ }
+ /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
+ celt_assert(nbBands>0); /* end has to be non-zero */
+ sum /= nbBands;
+ /* Recursive averaging */
+ sum = (sum+*average)>>1;
+ *average = sum;
+ /* Hysteresis */
+ sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
+ if (sum < 80)
+ {
+ decision = SPREAD_AGGRESSIVE;
+ } else if (sum < 256)
+ {
+ decision = SPREAD_NORMAL;
+ } else if (sum < 384)
+ {
+ decision = SPREAD_LIGHT;
+ } else {
+ decision = SPREAD_NONE;
+ }
+#ifdef FUZZING
+ decision = rand()&0x3;
+ *tapset_decision=rand()%3;
+#endif
+ return decision;
+}
+
+/* Indexing table for converting from natural Hadamard to ordery Hadamard
+ This is essentially a bit-reversed Gray, on top of which we've added
+ an inversion of the order because we want the DC at the end rather than
+ the beginning. The lines are for N=2, 4, 8, 16 */
+static const int ordery_table[] = {
+ 1, 0,
+ 3, 0, 2, 1,
+ 7, 0, 4, 3, 6, 1, 5, 2,
+ 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5,
+};
+
+static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+ int i,j;
+ VARDECL(celt_norm, tmp);
+ int N;
+ SAVE_STACK;
+ N = N0*stride;
+ ALLOC(tmp, N, celt_norm);
+ celt_assert(stride>0);
+ if (hadamard)
+ {
+ const int *ordery = ordery_table+stride-2;
+ for (i=0;i<stride;i++)
+ {
+ for (j=0;j<N0;j++)
+ tmp[ordery[i]*N0+j] = X[j*stride+i];
+ }
+ } else {
+ for (i=0;i<stride;i++)
+ for (j=0;j<N0;j++)
+ tmp[i*N0+j] = X[j*stride+i];
+ }
+ for (j=0;j<N;j++)
+ X[j] = tmp[j];
+ RESTORE_STACK;
+}
+
+static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+ int i,j;
+ VARDECL(celt_norm, tmp);
+ int N;
+ SAVE_STACK;
+ N = N0*stride;
+ ALLOC(tmp, N, celt_norm);
+ if (hadamard)
+ {
+ const int *ordery = ordery_table+stride-2;
+ for (i=0;i<stride;i++)
+ for (j=0;j<N0;j++)
+ tmp[j*stride+i] = X[ordery[i]*N0+j];
+ } else {
+ for (i=0;i<stride;i++)
+ for (j=0;j<N0;j++)
+ tmp[j*stride+i] = X[i*N0+j];
+ }
+ for (j=0;j<N;j++)
+ X[j] = tmp[j];
+ RESTORE_STACK;
+}
+
+void haar1(celt_norm *X, int N0, int stride)
+{
+ int i, j;
+ N0 >>= 1;
+ for (i=0;i<stride;i++)
+ for (j=0;j<N0;j++)
+ {
+ celt_norm tmp1, tmp2;
+ tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
+ tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
+ X[stride*2*j+i] = tmp1 + tmp2;
+ X[stride*(2*j+1)+i] = tmp1 - tmp2;
+ }
+}
+
+static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
+{
+ static const opus_int16 exp2_table8[8] =
+ {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
+ int qn, qb;
+ int N2 = 2*N-1;
+ if (stereo && N==2)
+ N2--;
+ /* The upper limit ensures that in a stereo split with itheta==16384, we'll
+ always have enough bits left over to code at least one pulse in the
+ side; otherwise it would collapse, since it doesn't get folded. */
+ qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
+
+ qb = IMIN(8<<BITRES, qb);
+
+ if (qb<(1<<BITRES>>1)) {
+ qn = 1;
+ } else {
+ qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
+ qn = (qn+1)>>1<<1;
+ }
+ celt_assert(qn <= 256);
+ return qn;
+}
+
+struct band_ctx {
+ int encode;
+ const CELTMode *m;
+ int i;
+ int intensity;
+ int spread;
+ int tf_change;
+ ec_ctx *ec;
+ opus_int32 remaining_bits;
+ const celt_ener *bandE;
+ opus_uint32 seed;
+};
+
+struct split_ctx {
+ int inv;
+ int imid;
+ int iside;
+ int delta;
+ int itheta;
+ int qalloc;
+};
+
+static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
+ celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
+ int LM,
+ int stereo, int *fill)
+{
+ int qn;
+ int itheta=0;
+ int delta;
+ int imid, iside;
+ int qalloc;
+ int pulse_cap;
+ int offset;
+ opus_int32 tell;
+ int inv=0;
+ int encode;
+ const CELTMode *m;
+ int i;
+ int intensity;
+ ec_ctx *ec;
+ const celt_ener *bandE;
+
+ encode = ctx->encode;
+ m = ctx->m;
+ i = ctx->i;
+ intensity = ctx->intensity;
+ ec = ctx->ec;
+ bandE = ctx->bandE;
+
+ /* Decide on the resolution to give to the split parameter theta */
+ pulse_cap = m->logN[i]+LM*(1<<BITRES);
+ offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
+ qn = compute_qn(N, *b, offset, pulse_cap, stereo);
+ if (stereo && i>=intensity)
+ qn = 1;
+ if (encode)
+ {
+ /* theta is the atan() of the ratio between the (normalized)
+ side and mid. With just that parameter, we can re-scale both
+ mid and side because we know that 1) they have unit norm and
+ 2) they are orthogonal. */
+ itheta = stereo_itheta(X, Y, stereo, N);
+ }
+ tell = ec_tell_frac(ec);
+ if (qn!=1)
+ {
+ if (encode)
+ itheta = (itheta*qn+8192)>>14;
+
+ /* Entropy coding of the angle. We use a uniform pdf for the
+ time split, a step for stereo, and a triangular one for the rest. */
+ if (stereo && N>2)
+ {
+ int p0 = 3;
+ int x = itheta;
+ int x0 = qn/2;
+ int ft = p0*(x0+1) + x0;
+ /* Use a probability of p0 up to itheta=8192 and then use 1 after */
+ if (encode)
+ {
+ ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+ } else {
+ int fs;
+ fs=ec_decode(ec,ft);
+ if (fs<(x0+1)*p0)
+ x=fs/p0;
+ else
+ x=x0+1+(fs-(x0+1)*p0);
+ ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+ itheta = x;
+ }
+ } else if (B0>1 || stereo) {
+ /* Uniform pdf */
+ if (encode)
+ ec_enc_uint(ec, itheta, qn+1);
+ else
+ itheta = ec_dec_uint(ec, qn+1);
+ } else {
+ int fs=1, ft;
+ ft = ((qn>>1)+1)*((qn>>1)+1);
+ if (encode)
+ {
+ int fl;
+
+ fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
+ fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+ ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+
+ ec_encode(ec, fl, fl+fs, ft);
+ } else {
+ /* Triangular pdf */
+ int fl=0;
+ int fm;
+ fm = ec_decode(ec, ft);
+
+ if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
+ {
+ itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
+ fs = itheta + 1;
+ fl = itheta*(itheta + 1)>>1;
+ }
+ else
+ {
+ itheta = (2*(qn + 1)
+ - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
+ fs = qn + 1 - itheta;
+ fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+ }
+
+ ec_dec_update(ec, fl, fl+fs, ft);
+ }
+ }
+ itheta = (opus_int32)itheta*16384/qn;
+ if (encode && stereo)
+ {
+ if (itheta==0)
+ intensity_stereo(m, X, Y, bandE, i, N);
+ else
+ stereo_split(X, Y, N);
+ }
+ /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
+ Let's do that at higher complexity */
+ } else if (stereo) {
+ if (encode)
+ {
+ inv = itheta > 8192;
+ if (inv)
+ {
+ int j;
+ for (j=0;j<N;j++)
+ Y[j] = -Y[j];
+ }
+ intensity_stereo(m, X, Y, bandE, i, N);
+ }
+ if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
+ {
+ if (encode)
+ ec_enc_bit_logp(ec, inv, 2);
+ else
+ inv = ec_dec_bit_logp(ec, 2);
+ } else
+ inv = 0;
+ itheta = 0;
+ }
+ qalloc = ec_tell_frac(ec) - tell;
+ *b -= qalloc;
+
+ if (itheta == 0)
+ {
+ imid = 32767;
+ iside = 0;
+ *fill &= (1<<B)-1;
+ delta = -16384;
+ } else if (itheta == 16384)
+ {
+ imid = 0;
+ iside = 32767;
+ *fill &= ((1<<B)-1)<<B;
+ delta = 16384;
+ } else {
+ imid = bitexact_cos((opus_int16)itheta);
+ iside = bitexact_cos((opus_int16)(16384-itheta));
+ /* This is the mid vs side allocation that minimizes squared error
+ in that band. */
+ delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
+ }
+
+ sctx->inv = inv;
+ sctx->imid = imid;
+ sctx->iside = iside;
+ sctx->delta = delta;
+ sctx->itheta = itheta;
+ sctx->qalloc = qalloc;
+}
+static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
+ celt_norm *lowband_out)
+{
+#ifdef RESYNTH
+ int resynth = 1;
+#else
+ int resynth = !ctx->encode;
+#endif
+ int c;
+ int stereo;
+ celt_norm *x = X;
+ int encode;
+ ec_ctx *ec;
+
+ encode = ctx->encode;
+ ec = ctx->ec;
+
+ stereo = Y != NULL;
+ c=0; do {
+ int sign=0;
+ if (ctx->remaining_bits>=1<<BITRES)
+ {
+ if (encode)
+ {
+ sign = x[0]<0;
+ ec_enc_bits(ec, sign, 1);
+ } else {
+ sign = ec_dec_bits(ec, 1);
+ }
+ ctx->remaining_bits -= 1<<BITRES;
+ b-=1<<BITRES;
+ }
+ if (resynth)
+ x[0] = sign ? -NORM_SCALING : NORM_SCALING;
+ x = Y;
+ } while (++c<1+stereo);
+ if (lowband_out)
+ lowband_out[0] = SHR16(X[0],4);
+ return 1;
+}
+
+/* This function is responsible for encoding and decoding a mono partition.
+ It can split the band in two and transmit the energy difference with
+ the two half-bands. It can be called recursively so bands can end up being
+ split in 8 parts. */
+static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
+ int N, int b, int B, celt_norm *lowband,
+ int LM,
+ opus_val16 gain, int fill)
+{
+ const unsigned char *cache;
+ int q;
+ int curr_bits;
+ int imid=0, iside=0;
+ int B0=B;
+ opus_val16 mid=0, side=0;
+ unsigned cm=0;
+#ifdef RESYNTH
+ int resynth = 1;
+#else
+ int resynth = !ctx->encode;
+#endif
+ celt_norm *Y=NULL;
+ int encode;
+ const CELTMode *m;
+ int i;
+ int spread;
+ ec_ctx *ec;
+
+ encode = ctx->encode;
+ m = ctx->m;
+ i = ctx->i;
+ spread = ctx->spread;
+ ec = ctx->ec;
+
+ /* If we need 1.5 more bit than we can produce, split the band in two. */
+ cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
+ if (LM != -1 && b > cache[cache[0]]+12 && N>2)
+ {
+ int mbits, sbits, delta;
+ int itheta;
+ int qalloc;
+ struct split_ctx sctx;
+ celt_norm *next_lowband2=NULL;
+ opus_int32 rebalance;
+
+ N >>= 1;
+ Y = X+N;
+ LM -= 1;
+ if (B==1)
+ fill = (fill&1)|(fill<<1);
+ B = (B+1)>>1;
+
+ compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
+ LM, 0, &fill);
+ imid = sctx.imid;
+ iside = sctx.iside;
+ delta = sctx.delta;
+ itheta = sctx.itheta;
+ qalloc = sctx.qalloc;
+#ifdef OPUS_FIXED_POINT
+ mid = imid;
+ side = iside;
+#else
+ mid = (1.f/32768)*imid;
+ side = (1.f/32768)*iside;
+#endif
+
+ /* Give more bits to low-energy MDCTs than they would otherwise deserve */
+ if (B0>1 && (itheta&0x3fff))
+ {
+ if (itheta > 8192)
+ /* Rough approximation for pre-echo masking */
+ delta -= delta>>(4-LM);
+ else
+ /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
+ delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
+ }
+ mbits = IMAX(0, IMIN(b, (b-delta)/2));
+ sbits = b-mbits;
+ ctx->remaining_bits -= qalloc;
+
+ if (lowband)
+ next_lowband2 = lowband+N; /* >32-bit split case */
+
+ rebalance = ctx->remaining_bits;
+ if (mbits >= sbits)
+ {
+ cm = quant_partition(ctx, X, N, mbits, B,
+ lowband, LM,
+ MULT16_16_P15(gain,mid), fill);
+ rebalance = mbits - (rebalance-ctx->remaining_bits);
+ if (rebalance > 3<<BITRES && itheta!=0)
+ sbits += rebalance - (3<<BITRES);
+ cm |= quant_partition(ctx, Y, N, sbits, B,
+ next_lowband2, LM,
+ MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+ } else {
+ cm = quant_partition(ctx, Y, N, sbits, B,
+ next_lowband2, LM,
+ MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+ rebalance = sbits - (rebalance-ctx->remaining_bits);
+ if (rebalance > 3<<BITRES && itheta!=16384)
+ mbits += rebalance - (3<<BITRES);
+ cm |= quant_partition(ctx, X, N, mbits, B,
+ lowband, LM,
+ MULT16_16_P15(gain,mid), fill);
+ }
+ } else {
+ /* This is the basic no-split case */
+ q = bits2pulses(m, i, LM, b);
+ curr_bits = pulses2bits(m, i, LM, q);
+ ctx->remaining_bits -= curr_bits;
+
+ /* Ensures we can never bust the budget */
+ while (ctx->remaining_bits < 0 && q > 0)
+ {
+ ctx->remaining_bits += curr_bits;
+ q--;
+ curr_bits = pulses2bits(m, i, LM, q);
+ ctx->remaining_bits -= curr_bits;
+ }
+
+ if (q!=0)
+ {
+ int K = get_pulses(q);
+
+ /* Finally do the actual quantization */
+ if (encode)
+ {
+ cm = alg_quant(X, N, K, spread, B, ec
+#ifdef RESYNTH
+ , gain
+#endif
+ );
+ } else {
+ cm = alg_unquant(X, N, K, spread, B, ec, gain);
+ }
+ } else {
+ /* If there's no pulse, fill the band anyway */
+ int j;
+ if (resynth)
+ {
+ unsigned cm_mask;
+ /* B can be as large as 16, so this shift might overflow an int on a
+ 16-bit platform; use a long to get defined behavior.*/
+ cm_mask = (unsigned)(1UL<<B)-1;
+ fill &= cm_mask;
+ if (!fill)
+ {
+ for (j=0;j<N;j++)
+ X[j] = 0;
+ } else {
+ if (lowband == NULL)
+ {
+ /* Noise */
+ for (j=0;j<N;j++)
+ {
+ ctx->seed = celt_lcg_rand(ctx->seed);
+ X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
+ }
+ cm = cm_mask;
+ } else {
+ /* Folded spectrum */
+ for (j=0;j<N;j++)
+ {
+ opus_val16 tmp;
+ ctx->seed = celt_lcg_rand(ctx->seed);
+ /* About 48 dB below the "normal" folding level */
+ tmp = QCONST16(1.0f/256, 10);
+ tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
+ X[j] = lowband[j]+tmp;
+ }
+ cm = fill;
+ }
+ renormalise_vector(X, N, gain);
+ }
+ }
+ }
+ }
+
+ return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the mono case. */
+static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
+ int N, int b, int B, celt_norm *lowband,
+ int LM, celt_norm *lowband_out,
+ opus_val16 gain, celt_norm *lowband_scratch, int fill)
+{
+ int N0=N;
+ int N_B=N;
+ int N_B0;
+ int B0=B;
+ int time_divide=0;
+ int recombine=0;
+ int longBlocks;
+ unsigned cm=0;
+#ifdef RESYNTH
+ int resynth = 1;
+#else
+ int resynth = !ctx->encode;
+#endif
+ int k;
+ int encode;
+ int tf_change;
+
+ encode = ctx->encode;
+ tf_change = ctx->tf_change;
+
+ longBlocks = B0==1;
+
+ N_B /= B;
+
+ /* Special case for one sample */
+ if (N==1)
+ {
+ return quant_band_n1(ctx, X, NULL, b, lowband_out);
+ }
+
+ if (tf_change>0)
+ recombine = tf_change;
+ /* Band recombining to increase frequency resolution */
+
+ if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+ {
+ int j;
+ for (j=0;j<N;j++)
+ lowband_scratch[j] = lowband[j];
+ lowband = lowband_scratch;
+ }
+
+ for (k=0;k<recombine;k++)
+ {
+ static const unsigned char bit_interleave_table[16]={
+ 0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+ };
+ if (encode)
+ haar1(X, N>>k, 1<<k);
+ if (lowband)
+ haar1(lowband, N>>k, 1<<k);
+ fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
+ }
+ B>>=recombine;
+ N_B<<=recombine;
+
+ /* Increasing the time resolution */
+ while ((N_B&1) == 0 && tf_change<0)
+ {
+ if (encode)
+ haar1(X, N_B, B);
+ if (lowband)
+ haar1(lowband, N_B, B);
+ fill |= fill<<B;
+ B <<= 1;
+ N_B >>= 1;
+ time_divide++;
+ tf_change++;
+ }
+ B0=B;
+ N_B0 = N_B;
+
+ /* Reorganize the samples in time order instead of frequency order */
+ if (B0>1)
+ {
+ if (encode)
+ deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+ if (lowband)
+ deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+ }
+
+ cm = quant_partition(ctx, X, N, b, B, lowband,
+ LM, gain, fill);
+
+ /* This code is used by the decoder and by the resynthesis-enabled encoder */
+ if (resynth)
+ {
+ /* Undo the sample reorganization going from time order to frequency order */
+ if (B0>1)
+ interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+
+ /* Undo time-freq changes that we did earlier */
+ N_B = N_B0;
+ B = B0;
+ for (k=0;k<time_divide;k++)
+ {
+ B >>= 1;
+ N_B <<= 1;
+ cm |= cm>>B;
+ haar1(X, N_B, B);
+ }
+
+ for (k=0;k<recombine;k++)
+ {
+ static const unsigned char bit_deinterleave_table[16]={
+ 0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+ 0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+ };
+ cm = bit_deinterleave_table[cm];
+ haar1(X, N0>>k, 1<<k);
+ }
+ B<<=recombine;
+
+ /* Scale output for later folding */
+ if (lowband_out)
+ {
+ int j;
+ opus_val16 n;
+ n = celt_sqrt(SHL32(EXTEND32(N0),22));
+ for (j=0;j<N0;j++)
+ lowband_out[j] = MULT16_16_Q15(n,X[j]);
+ }
+ cm &= (1<<B)-1;
+ }
+ return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the stereo case. */
+static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
+ int N, int b, int B, celt_norm *lowband,
+ int LM, celt_norm *lowband_out,
+ celt_norm *lowband_scratch, int fill)
+{
+ int imid=0, iside=0;
+ int inv = 0;
+ opus_val16 mid=0, side=0;
+ unsigned cm=0;
+#ifdef RESYNTH
+ int resynth = 1;
+#else
+ int resynth = !ctx->encode;
+#endif
+ int mbits, sbits, delta;
+ int itheta;
+ int qalloc;
+ struct split_ctx sctx;
+ int orig_fill;
+ int encode;
+ ec_ctx *ec;
+
+ encode = ctx->encode;
+ ec = ctx->ec;
+
+ /* Special case for one sample */
+ if (N==1)
+ {
+ return quant_band_n1(ctx, X, Y, b, lowband_out);
+ }
+
+ orig_fill = fill;
+
+ compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
+ LM, 1, &fill);
+ inv = sctx.inv;
+ imid = sctx.imid;
+ iside = sctx.iside;
+ delta = sctx.delta;
+ itheta = sctx.itheta;
+ qalloc = sctx.qalloc;
+#ifdef OPUS_FIXED_POINT
+ mid = imid;
+ side = iside;
+#else
+ mid = (1.f/32768)*imid;
+ side = (1.f/32768)*iside;
+#endif
+
+ /* This is a special case for N=2 that only works for stereo and takes
+ advantage of the fact that mid and side are orthogonal to encode
+ the side with just one bit. */
+ if (N==2)
+ {
+ int c;
+ int sign=0;
+ celt_norm *x2, *y2;
+ mbits = b;
+ sbits = 0;
+ /* Only need one bit for the side. */
+ if (itheta != 0 && itheta != 16384)
+ sbits = 1<<BITRES;
+ mbits -= sbits;
+ c = itheta > 8192;
+ ctx->remaining_bits -= qalloc+sbits;
+
+ x2 = c ? Y : X;
+ y2 = c ? X : Y;
+ if (sbits)
+ {
+ if (encode)
+ {
+ /* Here we only need to encode a sign for the side. */
+ sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
+ ec_enc_bits(ec, sign, 1);
+ } else {
+ sign = ec_dec_bits(ec, 1);
+ }
+ }
+ sign = 1-2*sign;
+ /* We use orig_fill here because we want to fold the side, but if
+ itheta==16384, we'll have cleared the low bits of fill. */
+ cm = quant_band(ctx, x2, N, mbits, B, lowband,
+ LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
+ /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
+ and there's no need to worry about mixing with the other channel. */
+ y2[0] = -sign*x2[1];
+ y2[1] = sign*x2[0];
+ if (resynth)
+ {
+ celt_norm tmp;
+ X[0] = MULT16_16_Q15(mid, X[0]);
+ X[1] = MULT16_16_Q15(mid, X[1]);
+ Y[0] = MULT16_16_Q15(side, Y[0]);
+ Y[1] = MULT16_16_Q15(side, Y[1]);
+ tmp = X[0];
+ X[0] = SUB16(tmp,Y[0]);
+ Y[0] = ADD16(tmp,Y[0]);
+ tmp = X[1];
+ X[1] = SUB16(tmp,Y[1]);
+ Y[1] = ADD16(tmp,Y[1]);
+ }
+ } else {
+ /* "Normal" split code */
+ opus_int32 rebalance;
+
+ mbits = IMAX(0, IMIN(b, (b-delta)/2));
+ sbits = b-mbits;
+ ctx->remaining_bits -= qalloc;
+
+ rebalance = ctx->remaining_bits;
+ if (mbits >= sbits)
+ {
+ /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+ mid for folding later. */
+ cm = quant_band(ctx, X, N, mbits, B,
+ lowband, LM, lowband_out,
+ Q15ONE, lowband_scratch, fill);
+ rebalance = mbits - (rebalance-ctx->remaining_bits);
+ if (rebalance > 3<<BITRES && itheta!=0)
+ sbits += rebalance - (3<<BITRES);
+
+ /* For a stereo split, the high bits of fill are always zero, so no
+ folding will be done to the side. */
+ cm |= quant_band(ctx, Y, N, sbits, B,
+ NULL, LM, NULL,
+ side, NULL, fill>>B);
+ } else {
+ /* For a stereo split, the high bits of fill are always zero, so no
+ folding will be done to the side. */
+ cm = quant_band(ctx, Y, N, sbits, B,
+ NULL, LM, NULL,
+ side, NULL, fill>>B);
+ rebalance = sbits - (rebalance-ctx->remaining_bits);
+ if (rebalance > 3<<BITRES && itheta!=16384)
+ mbits += rebalance - (3<<BITRES);
+ /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+ mid for folding later. */
+ cm |= quant_band(ctx, X, N, mbits, B,
+ lowband, LM, lowband_out,
+ Q15ONE, lowband_scratch, fill);
+ }
+ }
+
+
+ /* This code is used by the decoder and by the resynthesis-enabled encoder */
+ if (resynth)
+ {
+ if (N!=2)
+ stereo_merge(X, Y, mid, N);
+ if (inv)
+ {
+ int j;
+ for (j=0;j<N;j++)
+ Y[j] = -Y[j];
+ }
+ }
+ return cm;
+}
+
+
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+ celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
+ int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
+ opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed)
+{
+ int i;
+ opus_int32 remaining_bits;
+ const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+ celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
+ VARDECL(celt_norm, _norm);
+ celt_norm *lowband_scratch;
+ int B;
+ int M;
+ int lowband_offset;
+ int update_lowband = 1;
+ int C = Y_ != NULL ? 2 : 1;
+ int norm_offset;
+#ifdef RESYNTH
+ int resynth = 1;
+#else
+ int resynth = !encode;
+#endif
+ struct band_ctx ctx;
+ SAVE_STACK;
+
+ M = 1<<LM;
+ B = shortBlocks ? M : 1;
+ norm_offset = M*eBands[start];
+ /* No need to allocate norm for the last band because we don't need an
+ output in that band. */
+ ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
+ norm = _norm;
+ norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
+ /* We can use the last band as scratch space because we don't need that
+ scratch space for the last band. */
+ lowband_scratch = X_+M*eBands[m->nbEBands-1];
+
+ lowband_offset = 0;
+ ctx.bandE = bandE;
+ ctx.ec = ec;
+ ctx.encode = encode;
+ ctx.intensity = intensity;
+ ctx.m = m;
+ ctx.seed = *seed;
+ ctx.spread = spread;
+ for (i=start;i<end;i++)
+ {
+ opus_int32 tell;
+ int b;
+ int N;
+ opus_int32 curr_balance;
+ int effective_lowband=-1;
+ celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
+ int tf_change=0;
+ unsigned x_cm;
+ unsigned y_cm;
+ int last;
+
+ ctx.i = i;
+ last = (i==end-1);
+
+ X = X_+M*eBands[i];
+ if (Y_!=NULL)
+ Y = Y_+M*eBands[i];
+ else
+ Y = NULL;
+ N = M*eBands[i+1]-M*eBands[i];
+ tell = ec_tell_frac(ec);
+
+ /* Compute how many bits we want to allocate to this band */
+ if (i != start)
+ balance -= tell;
+ remaining_bits = total_bits-tell-1;
+ ctx.remaining_bits = remaining_bits;
+ if (i <= codedBands-1)
+ {
+ curr_balance = balance / IMIN(3, codedBands-i);
+ b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
+ } else {
+ b = 0;
+ }
+
+ if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
+ lowband_offset = i;
+
+ tf_change = tf_res[i];
+ ctx.tf_change = tf_change;
+ if (i>=m->effEBands)
+ {
+ X=norm;
+ if (Y_!=NULL)
+ Y = norm;
+ lowband_scratch = NULL;
+ }
+ if (i==end-1)
+ lowband_scratch = NULL;
+
+ /* Get a conservative estimate of the collapse_mask's for the bands we're
+ going to be folding from. */
+ if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
+ {
+ int fold_start;
+ int fold_end;
+ int fold_i;
+ /* This ensures we never repeat spectral content within one band */
+ effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
+ fold_start = lowband_offset;
+ while(M*eBands[--fold_start] > effective_lowband+norm_offset);
+ fold_end = lowband_offset-1;
+ while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
+ x_cm = y_cm = 0;
+ fold_i = fold_start; do {
+ x_cm |= collapse_masks[fold_i*C+0];
+ y_cm |= collapse_masks[fold_i*C+C-1];
+ } while (++fold_i<fold_end);
+ }
+ /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
+ always) be non-zero. */
+ else
+ x_cm = y_cm = (1<<B)-1;
+
+ if (dual_stereo && i==intensity)
+ {
+ int j;
+
+ /* Switch off dual stereo to do intensity. */
+ dual_stereo = 0;
+ if (resynth)
+ for (j=0;j<M*eBands[i]-norm_offset;j++)
+ norm[j] = HALF32(norm[j]+norm2[j]);
+ }
+ if (dual_stereo)
+ {
+ x_cm = quant_band(&ctx, X, N, b/2, B,
+ effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+ last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
+ y_cm = quant_band(&ctx, Y, N, b/2, B,
+ effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
+ last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
+ } else {
+ if (Y!=NULL)
+ {
+ x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+ effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+ last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
+ } else {
+ x_cm = quant_band(&ctx, X, N, b, B,
+ effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+ last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
+ }
+ y_cm = x_cm;
+ }
+ collapse_masks[i*C+0] = (unsigned char)x_cm;
+ collapse_masks[i*C+C-1] = (unsigned char)y_cm;
+ balance += pulses[i] + tell;
+
+ /* Update the folding position only as long as we have 1 bit/sample depth. */
+ update_lowband = b>(N<<BITRES);
+ }
+ *seed = ctx.seed;
+
+ RESTORE_STACK;
+}
+
diff --git a/drivers/opus/celt/bands.h b/drivers/opus/celt/bands.h
new file mode 100644
index 0000000000..fe1e47097a
--- /dev/null
+++ b/drivers/opus/celt/bands.h
@@ -0,0 +1,114 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2008-2009 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef BANDS_H
+#define BANDS_H
+
+#include "arch.h"
+#include "opus_modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "rate.h"
+
+/** Compute the amplitude (sqrt energy) in each of the bands
+ * @param m Mode data
+ * @param X Spectrum
+ * @param bandE Square root of the energy for each band (returned)
+ */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
+
+/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
+
+/** Normalise each band of X such that the energy in each band is
+ equal to 1
+ * @param m Mode data
+ * @param X Spectrum (returned normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M);
+
+/** Denormalise each band of X to restore full amplitude
+ * @param m Mode data
+ * @param X Spectrum (returned de-normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+ celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
+
+#define SPREAD_NONE (0)
+#define SPREAD_LIGHT (1)
+#define SPREAD_NORMAL (2)
+#define SPREAD_AGGRESSIVE (3)
+
+int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+ int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+ int end, int C, int M);
+
+#ifdef MEASURE_NORM_MSE
+void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C);
+#endif
+
+void haar1(celt_norm *X, int N0, int stride);
+
+/** Quantisation/encoding of the residual spectrum
+ * @param encode flag that indicates whether we're encoding (1) or decoding (0)
+ * @param m Mode data
+ * @param start First band to process
+ * @param end Last band to process + 1
+ * @param X Residual (normalised)
+ * @param Y Residual (normalised) for second channel (or NULL for mono)
+ * @param collapse_masks Anti-collapse tracking mask
+ * @param bandE Square root of the energy for each band
+ * @param pulses Bit allocation (per band) for PVQ
+ * @param shortBlocks Zero for long blocks, non-zero for short blocks
+ * @param spread Amount of spreading to use
+ * @param dual_stereo Zero for MS stereo, non-zero for dual stereo
+ * @param intensity First band to use intensity stereo
+ * @param tf_res Time-frequency resolution change
+ * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
+ * @param balance Number of unallocated bits
+ * @param en Entropy coder state
+ * @param LM log2() of the number of 2.5 subframes in the frame
+ * @param codedBands Last band to receive bits + 1
+ * @param seed Random generator seed
+ */
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+ celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
+ int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
+ opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
+
+void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
+ int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
+ opus_val16 *prev2logE, int *pulses, opus_uint32 seed);
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed);
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
+
+#endif /* BANDS_H */
diff --git a/drivers/opus/celt/celt.c b/drivers/opus/celt/celt.c
new file mode 100644
index 0000000000..b894e1e13f
--- /dev/null
+++ b/drivers/opus/celt/celt.c
@@ -0,0 +1,223 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2010 Xiph.Org Foundation
+ Copyright (c) 2008 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define CELT_C
+
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "opus_modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+#ifndef PACKAGE_VERSION
+#define PACKAGE_VERSION "unknown"
+#endif
+
+
+int resampling_factor(opus_int32 rate)
+{
+ int ret;
+ switch (rate)
+ {
+ case 48000:
+ ret = 1;
+ break;
+ case 24000:
+ ret = 2;
+ break;
+ case 16000:
+ ret = 3;
+ break;
+ case 12000:
+ ret = 4;
+ break;
+ case 8000:
+ ret = 6;
+ break;
+ default:
+#ifndef CUSTOM_MODES
+ celt_assert(0);
+#endif
+ ret = 0;
+ break;
+ }
+ return ret;
+}
+
+#ifndef OVERRIDE_COMB_FILTER_CONST
+static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+ opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+ opus_val32 x0, x1, x2, x3, x4;
+ int i;
+ x4 = x[-T-2];
+ x3 = x[-T-1];
+ x2 = x[-T];
+ x1 = x[-T+1];
+ for (i=0;i<N;i++)
+ {
+ x0=x[i-T+2];
+ y[i] = x[i]
+ + MULT16_32_Q15(g10,x2)
+ + MULT16_32_Q15(g11,ADD32(x1,x3))
+ + MULT16_32_Q15(g12,ADD32(x0,x4));
+ x4=x3;
+ x3=x2;
+ x2=x1;
+ x1=x0;
+ }
+
+}
+#endif
+
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+ opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+ const opus_val16 *window, int overlap)
+{
+ int i;
+ /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+ opus_val16 g00, g01, g02, g10, g11, g12;
+ opus_val32 x0, x1, x2, x3, x4;
+ static const opus_val16 gains[3][3] = {
+ {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+ {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+ {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+
+ if (g0==0 && g1==0)
+ {
+ /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+ if (x!=y)
+ OPUS_MOVE(y, x, N);
+ return;
+ }
+ g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
+ g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
+ g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
+ g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
+ g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
+ g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+ x1 = x[-T1+1];
+ x2 = x[-T1 ];
+ x3 = x[-T1-1];
+ x4 = x[-T1-2];
+ for (i=0;i<overlap;i++)
+ {
+ opus_val16 f;
+ x0=x[i-T1+2];
+ f = MULT16_16_Q15(window[i],window[i]);
+ y[i] = x[i]
+ + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+ + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+ + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+ + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+ + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+ + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
+ x4=x3;
+ x3=x2;
+ x2=x1;
+ x1=x0;
+
+ }
+ if (g1==0)
+ {
+ /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+ if (x!=y)
+ OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+ return;
+ }
+
+ /* Compute the part with the constant filter. */
+ comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
+}
+
+const signed char tf_select_table[4][8] = {
+ {0, -1, 0, -1, 0,-1, 0,-1},
+ {0, -1, 0, -2, 1, 0, 1,-1},
+ {0, -2, 0, -3, 2, 0, 1,-1},
+ {0, -2, 0, -3, 3, 0, 1,-1},
+};
+
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C)
+{
+ int i;
+ for (i=0;i<m->nbEBands;i++)
+ {
+ int N;
+ N=(m->eBands[i+1]-m->eBands[i])<<LM;
+ cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2;
+ }
+}
+
+
+
+const char *opus_strerror(int error)
+{
+ static const char * const error_strings[8] = {
+ "success",
+ "invalid argument",
+ "buffer too small",
+ "internal error",
+ "corrupted stream",
+ "request not implemented",
+ "invalid state",
+ "memory allocation failed"
+ };
+ if (error > 0 || error < -7)
+ return "unknown error";
+ else
+ return error_strings[-error];
+}
+
+const char *opus_get_version_string(void)
+{
+ return "libopus " PACKAGE_VERSION
+#ifdef OPUS_FIXED_POINT
+ "-fixed"
+#endif
+#ifdef FUZZING
+ "-fuzzing"
+#endif
+ ;
+}
diff --git a/drivers/opus/celt/celt.h b/drivers/opus/celt/celt.h
new file mode 100644
index 0000000000..5deea1f0aa
--- /dev/null
+++ b/drivers/opus/celt/celt.h
@@ -0,0 +1,218 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2008 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/**
+ @file celt.h
+ @brief Contains all the functions for encoding and decoding audio
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_H
+#define CELT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+#include "opus_custom.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "arch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CELTEncoder OpusCustomEncoder
+#define CELTDecoder OpusCustomDecoder
+#define CELTMode OpusCustomMode
+
+typedef struct {
+ int valid;
+ float tonality;
+ float tonality_slope;
+ float noisiness;
+ float activity;
+ float music_prob;
+ int bandwidth;
+}AnalysisInfo;
+
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
+
+/* Encoder/decoder Requests */
+
+/* Expose this option again when variable framesize actually works */
+#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */
+
+
+#define CELT_SET_PREDICTION_REQUEST 10002
+/** Controls the use of interframe prediction.
+ 0=Independent frames
+ 1=Short term interframe prediction allowed
+ 2=Long term prediction allowed
+ */
+#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_INPUT_CLIPPING_REQUEST 10004
+#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_AND_CLEAR_ERROR_REQUEST 10007
+#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x)
+
+#define CELT_SET_CHANNELS_REQUEST 10008
+#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x)
+
+
+/* Internal */
+#define CELT_SET_START_BAND_REQUEST 10010
+#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_END_BAND_REQUEST 10012
+#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_MODE_REQUEST 10015
+/** Get the CELTMode used by an encoder or decoder */
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
+
+#define CELT_SET_SIGNALLING_REQUEST 10016
+#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_TONALITY_REQUEST 10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST 10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_ANALYSIS_REQUEST 10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
+
+#define OPUS_SET_LFE_REQUEST 10024
+#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
+
+#define OPUS_SET_ENERGY_MASK_REQUEST 10026
+#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
+
+/* Encoder stuff */
+
+int celt_encoder_get_size(int channels);
+
+int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+ int arch);
+
+
+
+/* Decoder stuff */
+
+int celt_decoder_get_size(int channels);
+
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
+
+int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);
+
+#define celt_encoder_ctl opus_custom_encoder_ctl
+#define celt_decoder_ctl opus_custom_decoder_ctl
+
+
+#ifdef CUSTOM_MODES
+#define OPUS_CUSTOM_NOSTATIC
+#else
+#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE
+#endif
+
+static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
+/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
+static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
+
+static const unsigned char tapset_icdf[3]={2,1,0};
+
+#ifdef CUSTOM_MODES
+static const unsigned char toOpusTable[20] = {
+ 0xE0, 0xE8, 0xF0, 0xF8,
+ 0xC0, 0xC8, 0xD0, 0xD8,
+ 0xA0, 0xA8, 0xB0, 0xB8,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x80, 0x88, 0x90, 0x98,
+};
+
+static const unsigned char fromOpusTable[16] = {
+ 0x80, 0x88, 0x90, 0x98,
+ 0x40, 0x48, 0x50, 0x58,
+ 0x20, 0x28, 0x30, 0x38,
+ 0x00, 0x08, 0x10, 0x18
+};
+
+static OPUS_INLINE int toOpus(unsigned char c)
+{
+ int ret=0;
+ if (c<0xA0)
+ ret = toOpusTable[c>>3];
+ if (ret == 0)
+ return -1;
+ else
+ return ret|(c&0x7);
+}
+
+static OPUS_INLINE int fromOpus(unsigned char c)
+{
+ if (c<0x80)
+ return -1;
+ else
+ return fromOpusTable[(c>>3)-16] | (c&0x7);
+}
+#endif /* CUSTOM_MODES */
+
+#define COMBFILTER_MAXPERIOD 1024
+#define COMBFILTER_MINPERIOD 15
+
+extern const signed char tf_select_table[4][8];
+
+int resampling_factor(opus_int32 rate);
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+ int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+ opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+ const opus_val16 *window, int overlap);
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C);
+
+#ifdef RESYNTH
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
+
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+ celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CELT_H */
diff --git a/drivers/opus/celt/celt_decoder.c b/drivers/opus/celt/celt_decoder.c
new file mode 100644
index 0000000000..93791feab4
--- /dev/null
+++ b/drivers/opus/celt/celt_decoder.c
@@ -0,0 +1,1195 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2010 Xiph.Org Foundation
+ Copyright (c) 2008 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define CELT_DECODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "opus_modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+/**********************************************************************/
+/* */
+/* DECODER */
+/* */
+/**********************************************************************/
+#define DECODE_BUFFER_SIZE 2048
+
+/** Decoder state
+ @brief Decoder state
+ */
+struct OpusCustomDecoder {
+ const OpusCustomMode *mode;
+ int overlap;
+ int channels;
+ int stream_channels;
+
+ int downsample;
+ int start, end;
+ int signalling;
+ int arch;
+
+ /* Everything beyond this point gets cleared on a reset */
+#define DECODER_RESET_START rng
+
+ opus_uint32 rng;
+ int error;
+ int last_pitch_index;
+ int loss_count;
+ int postfilter_period;
+ int postfilter_period_old;
+ opus_val16 postfilter_gain;
+ opus_val16 postfilter_gain_old;
+ int postfilter_tapset;
+ int postfilter_tapset_old;
+
+ celt_sig preemph_memD[2];
+
+ celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
+ /* opus_val16 lpc[], Size = channels*LPC_ORDER */
+ /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
+ /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
+ /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
+ /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
+};
+
+int celt_decoder_get_size(int channels)
+{
+ const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+ return opus_custom_decoder_get_size(mode, channels);
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
+{
+ int size = sizeof(struct CELTDecoder)
+ + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
+ + channels*LPC_ORDER*sizeof(opus_val16)
+ + 4*2*mode->nbEBands*sizeof(opus_val16);
+ return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
+{
+ int ret;
+ CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
+ ret = opus_custom_decoder_init(st, mode, channels);
+ if (ret != OPUS_OK)
+ {
+ opus_custom_decoder_destroy(st);
+ st = NULL;
+ }
+ if (error)
+ *error = ret;
+ return st;
+}
+#endif /* CUSTOM_MODES */
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
+{
+ int ret;
+ ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
+ if (ret != OPUS_OK)
+ return ret;
+ st->downsample = resampling_factor(sampling_rate);
+ if (st->downsample==0)
+ return OPUS_BAD_ARG;
+ else
+ return OPUS_OK;
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
+{
+ if (channels < 0 || channels > 2)
+ return OPUS_BAD_ARG;
+
+ if (st==NULL)
+ return OPUS_ALLOC_FAIL;
+
+ OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
+
+ st->mode = mode;
+ st->overlap = mode->overlap;
+ st->stream_channels = st->channels = channels;
+
+ st->downsample = 1;
+ st->start = 0;
+ st->end = st->mode->effEBands;
+ st->signalling = 1;
+ st->arch = opus_select_arch();
+
+ st->loss_count = 0;
+
+ opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
+
+ return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_decoder_destroy(CELTDecoder *st)
+{
+ opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
+{
+#ifdef OPUS_FIXED_POINT
+ x = PSHR32(x, SIG_SHIFT);
+ x = MAX32(x, -32768);
+ x = MIN32(x, 32767);
+ return EXTRACT16(x);
+#else
+ return (opus_val16)x;
+#endif
+}
+
+#ifndef RESYNTH
+static
+#endif
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+{
+ int c;
+ int Nd;
+ int apply_downsampling=0;
+ opus_val16 coef0;
+
+ coef0 = coef[0];
+ Nd = N/downsample;
+ c=0; do {
+ int j;
+ celt_sig * OPUS_RESTRICT x;
+ opus_val16 * OPUS_RESTRICT y;
+ celt_sig m = mem[c];
+ x =in[c];
+ y = pcm+c;
+#ifdef CUSTOM_MODES
+ if (coef[1] != 0)
+ {
+ opus_val16 coef1 = coef[1];
+ opus_val16 coef3 = coef[3];
+ for (j=0;j<N;j++)
+ {
+ celt_sig tmp = x[j] + m + VERY_SMALL;
+ m = MULT16_32_Q15(coef0, tmp)
+ - MULT16_32_Q15(coef1, x[j]);
+ tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
+ scratch[j] = tmp;
+ }
+ apply_downsampling=1;
+ } else
+#endif
+ if (downsample>1)
+ {
+ /* Shortcut for the standard (non-custom modes) case */
+ for (j=0;j<N;j++)
+ {
+ celt_sig tmp = x[j] + m + VERY_SMALL;
+ m = MULT16_32_Q15(coef0, tmp);
+ scratch[j] = tmp;
+ }
+ apply_downsampling=1;
+ } else {
+ /* Shortcut for the standard (non-custom modes) case */
+ for (j=0;j<N;j++)
+ {
+ celt_sig tmp = x[j] + m + VERY_SMALL;
+ m = MULT16_32_Q15(coef0, tmp);
+ y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+ }
+ }
+ mem[c] = m;
+
+ if (apply_downsampling)
+ {
+ /* Perform down-sampling */
+ for (j=0;j<Nd;j++)
+ y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+ }
+ } while (++c<C);
+}
+
+/** Compute the IMDCT and apply window for all sub-frames and
+ all channels in a frame */
+#ifndef RESYNTH
+static
+#endif
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+ celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+{
+ int b, c;
+ int B;
+ int N;
+ int shift;
+ const int overlap = OVERLAP(mode);
+
+ if (shortBlocks)
+ {
+ B = shortBlocks;
+ N = mode->shortMdctSize;
+ shift = mode->maxLM;
+ } else {
+ B = 1;
+ N = mode->shortMdctSize<<LM;
+ shift = mode->maxLM-LM;
+ }
+ c=0; do {
+ /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+ for (b=0;b<B;b++)
+ clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
+ } while (++c<C);
+}
+
+static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
+{
+ int i, curr, tf_select;
+ int tf_select_rsv;
+ int tf_changed;
+ int logp;
+ opus_uint32 budget;
+ opus_uint32 tell;
+
+ budget = dec->storage*8;
+ tell = ec_tell(dec);
+ logp = isTransient ? 2 : 4;
+ tf_select_rsv = LM>0 && tell+logp+1<=budget;
+ budget -= tf_select_rsv;
+ tf_changed = curr = 0;
+ for (i=start;i<end;i++)
+ {
+ if (tell+logp<=budget)
+ {
+ curr ^= ec_dec_bit_logp(dec, logp);
+ tell = ec_tell(dec);
+ tf_changed |= curr;
+ }
+ tf_res[i] = curr;
+ logp = isTransient ? 4 : 5;
+ }
+ tf_select = 0;
+ if (tf_select_rsv &&
+ tf_select_table[LM][4*isTransient+0+tf_changed] !=
+ tf_select_table[LM][4*isTransient+2+tf_changed])
+ {
+ tf_select = ec_dec_bit_logp(dec, 1);
+ }
+ for (i=start;i<end;i++)
+ {
+ tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+ }
+}
+
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+ CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+ current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+ pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
+{
+ int c;
+ int i;
+ const int C = st->channels;
+ celt_sig *decode_mem[2];
+ celt_sig *out_syn[2];
+ opus_val16 *lpc;
+ opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+ const OpusCustomMode *mode;
+ int nbEBands;
+ int overlap;
+ int start;
+ int downsample;
+ int loss_count;
+ int noise_based;
+ const opus_int16 *eBands;
+ VARDECL(celt_sig, scratch);
+ SAVE_STACK;
+
+ mode = st->mode;
+ nbEBands = mode->nbEBands;
+ overlap = mode->overlap;
+ eBands = mode->eBands;
+
+ c=0; do {
+ decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+ out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+ } while (++c<C);
+ lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
+ oldBandE = lpc+C*LPC_ORDER;
+ oldLogE = oldBandE + 2*nbEBands;
+ oldLogE2 = oldLogE + 2*nbEBands;
+ backgroundLogE = oldLogE2 + 2*nbEBands;
+
+ loss_count = st->loss_count;
+ start = st->start;
+ downsample = st->downsample;
+ noise_based = loss_count >= 5 || start != 0;
+ ALLOC(scratch, noise_based?N*C:N, celt_sig);
+ if (noise_based)
+ {
+ /* Noise-based PLC/CNG */
+ celt_sig *freq;
+ VARDECL(celt_norm, X);
+ opus_uint32 seed;
+ opus_val16 *plcLogE;
+ int end;
+ int effEnd;
+
+ end = st->end;
+ effEnd = IMAX(start, IMIN(end, mode->effEBands));
+
+ /* Share the interleaved signal MDCT coefficient buffer with the
+ deemphasis scratch buffer. */
+ freq = scratch;
+ ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
+
+ if (loss_count >= 5)
+ plcLogE = backgroundLogE;
+ else {
+ /* Energy decay */
+ opus_val16 decay = loss_count==0 ?
+ QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+ c=0; do
+ {
+ for (i=start;i<end;i++)
+ oldBandE[c*nbEBands+i] -= decay;
+ } while (++c<C);
+ plcLogE = oldBandE;
+ }
+ seed = st->rng;
+ for (c=0;c<C;c++)
+ {
+ for (i=start;i<effEnd;i++)
+ {
+ int j;
+ int boffs;
+ int blen;
+ boffs = N*c+(eBands[i]<<LM);
+ blen = (eBands[i+1]-eBands[i])<<LM;
+ for (j=0;j<blen;j++)
+ {
+ seed = celt_lcg_rand(seed);
+ X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
+ }
+ renormalise_vector(X+boffs, blen, Q15ONE);
+ }
+ }
+ st->rng = seed;
+
+ denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
+
+ c=0; do {
+ int bound = eBands[effEnd]<<LM;
+ if (downsample!=1)
+ bound = IMIN(bound, N/downsample);
+ for (i=bound;i<N;i++)
+ freq[c*N+i] = 0;
+ } while (++c<C);
+ c=0; do {
+ OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
+ DECODE_BUFFER_SIZE-N+(overlap>>1));
+ } while (++c<C);
+ compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+ } else {
+ /* Pitch-based PLC */
+ const opus_val16 *window;
+ opus_val16 fade = Q15ONE;
+ int pitch_index;
+ VARDECL(opus_val32, etmp);
+ VARDECL(opus_val16, exc);
+
+ if (loss_count == 0)
+ {
+ VARDECL( opus_val16, lp_pitch_buf );
+ ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+ pitch_downsample(decode_mem, lp_pitch_buf,
+ DECODE_BUFFER_SIZE, C, st->arch);
+ pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+ DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+ PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
+ pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+ st->last_pitch_index = pitch_index;
+ } else {
+ pitch_index = st->last_pitch_index;
+ fade = QCONST16(.8f,15);
+ }
+
+ ALLOC(etmp, overlap, opus_val32);
+ ALLOC(exc, MAX_PERIOD, opus_val16);
+ window = mode->window;
+ c=0; do {
+ opus_val16 decay;
+ opus_val16 attenuation;
+ opus_val32 S1=0;
+ celt_sig *buf;
+ int extrapolation_offset;
+ int extrapolation_len;
+ int exc_length;
+ int j;
+
+ buf = decode_mem[c];
+ for (i=0;i<MAX_PERIOD;i++) {
+ exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
+ }
+
+ if (loss_count == 0)
+ {
+ opus_val32 ac[LPC_ORDER+1];
+ /* Compute LPC coefficients for the last MAX_PERIOD samples before
+ the first loss so we can work in the excitation-filter domain. */
+ _celt_autocorr(exc, ac, window, overlap,
+ LPC_ORDER, MAX_PERIOD, st->arch);
+ /* Add a noise floor of -40 dB. */
+#ifdef OPUS_FIXED_POINT
+ ac[0] += SHR32(ac[0],13);
+#else
+ ac[0] *= 1.0001f;
+#endif
+ /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
+ for (i=1;i<=LPC_ORDER;i++)
+ {
+ /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef OPUS_FIXED_POINT
+ ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+ ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
+#endif
+ }
+ _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+ }
+ /* We want the excitation for 2 pitch periods in order to look for a
+ decaying signal, but we can't get more than MAX_PERIOD. */
+ exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+ /* Initialize the LPC history with the samples just before the start
+ of the region for which we're computing the excitation. */
+ {
+ opus_val16 lpc_mem[LPC_ORDER];
+ for (i=0;i<LPC_ORDER;i++)
+ {
+ lpc_mem[i] =
+ ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+ }
+ /* Compute the excitation for exc_length samples before the loss. */
+ celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+ exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
+ }
+
+ /* Check if the waveform is decaying, and if so how fast.
+ We do this to avoid adding energy when concealing in a segment
+ with decaying energy. */
+ {
+ opus_val32 E1=1, E2=1;
+ int decay_length;
+#ifdef OPUS_FIXED_POINT
+ int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20);
+#endif
+ decay_length = exc_length>>1;
+ for (i=0;i<decay_length;i++)
+ {
+ opus_val16 e;
+ e = exc[MAX_PERIOD-decay_length+i];
+ E1 += SHR32(MULT16_16(e, e), shift);
+ e = exc[MAX_PERIOD-2*decay_length+i];
+ E2 += SHR32(MULT16_16(e, e), shift);
+ }
+ E1 = MIN32(E1, E2);
+ decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2));
+ }
+
+ /* Move the decoder memory one frame to the left to give us room to
+ add the data for the new frame. We ignore the overlap that extends
+ past the end of the buffer, because we aren't going to use it. */
+ OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N);
+
+ /* Extrapolate from the end of the excitation with a period of
+ "pitch_index", scaling down each period by an additional factor of
+ "decay". */
+ extrapolation_offset = MAX_PERIOD-pitch_index;
+ /* We need to extrapolate enough samples to cover a complete MDCT
+ window (including overlap/2 samples on both sides). */
+ extrapolation_len = N+overlap;
+ /* We also apply fading if this is not the first loss. */
+ attenuation = MULT16_16_Q15(fade, decay);
+ for (i=j=0;i<extrapolation_len;i++,j++)
+ {
+ opus_val16 tmp;
+ if (j >= pitch_index) {
+ j -= pitch_index;
+ attenuation = MULT16_16_Q15(attenuation, decay);
+ }
+ buf[DECODE_BUFFER_SIZE-N+i] =
+ SHL32(EXTEND32(MULT16_16_Q15(attenuation,
+ exc[extrapolation_offset+j])), SIG_SHIFT);
+ /* Compute the energy of the previously decoded signal whose
+ excitation we're copying. */
+ tmp = ROUND16(
+ buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
+ SIG_SHIFT);
+ S1 += SHR32(MULT16_16(tmp, tmp), 8);
+ }
+
+ {
+ opus_val16 lpc_mem[LPC_ORDER];
+ /* Copy the last decoded samples (prior to the overlap region) to
+ synthesis filter memory so we can have a continuous signal. */
+ for (i=0;i<LPC_ORDER;i++)
+ lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+ /* Apply the synthesis filter to convert the excitation back into
+ the signal domain. */
+ celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+ buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+ lpc_mem);
+ }
+
+ /* Check if the synthesis energy is higher than expected, which can
+ happen with the signal changes during our window. If so,
+ attenuate. */
+ {
+ opus_val32 S2=0;
+ for (i=0;i<extrapolation_len;i++)
+ {
+ opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
+ S2 += SHR32(MULT16_16(tmp, tmp), 8);
+ }
+ /* This checks for an "explosion" in the synthesis. */
+#ifdef OPUS_FIXED_POINT
+ if (!(S1 > SHR32(S2,2)))
+#else
+ /* The float test is written this way to catch NaNs in the output
+ of the IIR filter at the same time. */
+ if (!(S1 > 0.2f*S2))
+#endif
+ {
+ for (i=0;i<extrapolation_len;i++)
+ buf[DECODE_BUFFER_SIZE-N+i] = 0;
+ } else if (S1 < S2)
+ {
+ opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
+ for (i=0;i<overlap;i++)
+ {
+ opus_val16 tmp_g = Q15ONE
+ - MULT16_16_Q15(window[i], Q15ONE-ratio);
+ buf[DECODE_BUFFER_SIZE-N+i] =
+ MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
+ }
+ for (i=overlap;i<extrapolation_len;i++)
+ {
+ buf[DECODE_BUFFER_SIZE-N+i] =
+ MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]);
+ }
+ }
+ }
+
+ /* Apply the pre-filter to the MDCT overlap for the next frame because
+ the post-filter will be re-applied in the decoder after the MDCT
+ overlap. */
+ comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
+ st->postfilter_period, st->postfilter_period, overlap,
+ -st->postfilter_gain, -st->postfilter_gain,
+ st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
+
+ /* Simulate TDAC on the concealed audio so that it blends with the
+ MDCT of the next frame. */
+ for (i=0;i<overlap/2;i++)
+ {
+ buf[DECODE_BUFFER_SIZE+i] =
+ MULT16_32_Q15(window[i], etmp[overlap-1-i])
+ + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
+ }
+ } while (++c<C);
+ }
+
+ deemphasis(out_syn, pcm, N, C, downsample,
+ mode->preemph, st->preemph_memD, scratch);
+
+ st->loss_count = loss_count+1;
+
+ RESTORE_STACK;
+}
+
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
+{
+ int c, i, N;
+ int spread_decision;
+ opus_int32 bits;
+ ec_dec _dec;
+ VARDECL(celt_sig, freq);
+ VARDECL(celt_norm, X);
+ VARDECL(int, fine_quant);
+ VARDECL(int, pulses);
+ VARDECL(int, cap);
+ VARDECL(int, offsets);
+ VARDECL(int, fine_priority);
+ VARDECL(int, tf_res);
+ VARDECL(unsigned char, collapse_masks);
+ celt_sig *decode_mem[2];
+ celt_sig *out_syn[2];
+ opus_val16 *lpc;
+ opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+
+ int shortBlocks;
+ int isTransient;
+ int intra_ener;
+ const int CC = st->channels;
+ int LM, M;
+ int effEnd;
+ int codedBands;
+ int alloc_trim;
+ int postfilter_pitch;
+ opus_val16 postfilter_gain;
+ int intensity=0;
+ int dual_stereo=0;
+ opus_int32 total_bits;
+ opus_int32 balance;
+ opus_int32 tell;
+ int dynalloc_logp;
+ int postfilter_tapset;
+ int anti_collapse_rsv;
+ int anti_collapse_on=0;
+ int silence;
+ int C = st->stream_channels;
+ const OpusCustomMode *mode;
+ int nbEBands;
+ int overlap;
+ const opus_int16 *eBands;
+ ALLOC_STACK;
+
+ mode = st->mode;
+ nbEBands = mode->nbEBands;
+ overlap = mode->overlap;
+ eBands = mode->eBands;
+ frame_size *= st->downsample;
+
+ c=0; do {
+ decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+ } while (++c<CC);
+ lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
+ oldBandE = lpc+CC*LPC_ORDER;
+ oldLogE = oldBandE + 2*nbEBands;
+ oldLogE2 = oldLogE + 2*nbEBands;
+ backgroundLogE = oldLogE2 + 2*nbEBands;
+
+#ifdef CUSTOM_MODES
+ if (st->signalling && data!=NULL)
+ {
+ int data0=data[0];
+ /* Convert "standard mode" to Opus header */
+ if (mode->Fs==48000 && mode->shortMdctSize==120)
+ {
+ data0 = fromOpus(data0);
+ if (data0<0)
+ return OPUS_INVALID_PACKET;
+ }
+ st->end = IMAX(1, mode->effEBands-2*(data0>>5));
+ LM = (data0>>3)&0x3;
+ C = 1 + ((data0>>2)&0x1);
+ data++;
+ len--;
+ if (LM>mode->maxLM)
+ return OPUS_INVALID_PACKET;
+ if (frame_size < mode->shortMdctSize<<LM)
+ return OPUS_BUFFER_TOO_SMALL;
+ else
+ frame_size = mode->shortMdctSize<<LM;
+ } else {
+#else
+ {
+#endif
+ for (LM=0;LM<=mode->maxLM;LM++)
+ if (mode->shortMdctSize<<LM==frame_size)
+ break;
+ if (LM>mode->maxLM)
+ return OPUS_BAD_ARG;
+ }
+ M=1<<LM;
+
+ if (len<0 || len>1275 || pcm==NULL)
+ return OPUS_BAD_ARG;
+
+ N = M*mode->shortMdctSize;
+
+ effEnd = st->end;
+ if (effEnd > mode->effEBands)
+ effEnd = mode->effEBands;
+
+ if (data == NULL || len<=1)
+ {
+ celt_decode_lost(st, pcm, N, LM);
+ RESTORE_STACK;
+ return frame_size/st->downsample;
+ }
+
+ if (dec == NULL)
+ {
+ ec_dec_init(&_dec,(unsigned char*)data,len);
+ dec = &_dec;
+ }
+
+ if (C==1)
+ {
+ for (i=0;i<nbEBands;i++)
+ oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]);
+ }
+
+ total_bits = len*8;
+ tell = ec_tell(dec);
+
+ if (tell >= total_bits)
+ silence = 1;
+ else if (tell==1)
+ silence = ec_dec_bit_logp(dec, 15);
+ else
+ silence = 0;
+ if (silence)
+ {
+ /* Pretend we've read all the remaining bits */
+ tell = len*8;
+ dec->nbits_total+=tell-ec_tell(dec);
+ }
+
+ postfilter_gain = 0;
+ postfilter_pitch = 0;
+ postfilter_tapset = 0;
+ if (st->start==0 && tell+16 <= total_bits)
+ {
+ if(ec_dec_bit_logp(dec, 1))
+ {
+ int qg, octave;
+ octave = ec_dec_uint(dec, 6);
+ postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
+ qg = ec_dec_bits(dec, 3);
+ if (ec_tell(dec)+2<=total_bits)
+ postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
+ postfilter_gain = QCONST16(.09375f,15)*(qg+1);
+ }
+ tell = ec_tell(dec);
+ }
+
+ if (LM > 0 && tell+3 <= total_bits)
+ {
+ isTransient = ec_dec_bit_logp(dec, 3);
+ tell = ec_tell(dec);
+ }
+ else
+ isTransient = 0;
+
+ if (isTransient)
+ shortBlocks = M;
+ else
+ shortBlocks = 0;
+
+ /* Decode the global flags (first symbols in the stream) */
+ intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+ /* Get band energies */
+ unquant_coarse_energy(mode, st->start, st->end, oldBandE,
+ intra_ener, dec, C, LM);
+
+ ALLOC(tf_res, nbEBands, int);
+ tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
+
+ tell = ec_tell(dec);
+ spread_decision = SPREAD_NORMAL;
+ if (tell+4 <= total_bits)
+ spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
+
+ ALLOC(cap, nbEBands, int);
+
+ init_caps(mode,cap,LM,C);
+
+ ALLOC(offsets, nbEBands, int);
+
+ dynalloc_logp = 6;
+ total_bits<<=BITRES;
+ tell = ec_tell_frac(dec);
+ for (i=st->start;i<st->end;i++)
+ {
+ int width, quanta;
+ int dynalloc_loop_logp;
+ int boost;
+ width = C*(eBands[i+1]-eBands[i])<<LM;
+ /* quanta is 6 bits, but no more than 1 bit/sample
+ and no less than 1/8 bit/sample */
+ quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+ dynalloc_loop_logp = dynalloc_logp;
+ boost = 0;
+ while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
+ {
+ int flag;
+ flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
+ tell = ec_tell_frac(dec);
+ if (!flag)
+ break;
+ boost += quanta;
+ total_bits -= quanta;
+ dynalloc_loop_logp = 1;
+ }
+ offsets[i] = boost;
+ /* Making dynalloc more likely */
+ if (boost>0)
+ dynalloc_logp = IMAX(2, dynalloc_logp-1);
+ }
+
+ ALLOC(fine_quant, nbEBands, int);
+ alloc_trim = tell+(6<<BITRES) <= total_bits ?
+ ec_dec_icdf(dec, trim_icdf, 7) : 5;
+
+ bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
+ anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+ bits -= anti_collapse_rsv;
+
+ ALLOC(pulses, nbEBands, int);
+ ALLOC(fine_priority, nbEBands, int);
+
+ codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+ alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+ fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
+
+ unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
+
+ /* Decode fixed codebook */
+ ALLOC(collapse_masks, C*nbEBands, unsigned char);
+ ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
+
+ quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+ NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
+ len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
+
+ if (anti_collapse_rsv > 0)
+ {
+ anti_collapse_on = ec_dec_bits(dec, 1);
+ }
+
+ unquant_energy_finalise(mode, st->start, st->end, oldBandE,
+ fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
+
+ if (anti_collapse_on)
+ anti_collapse(mode, X, collapse_masks, LM, C, N,
+ st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+
+ ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
+
+ if (silence)
+ {
+ for (i=0;i<C*nbEBands;i++)
+ oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+ for (i=0;i<C*N;i++)
+ freq[i] = 0;
+ } else {
+ /* Synthesis */
+ denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+ }
+ c=0; do {
+ OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+ } while (++c<CC);
+
+ c=0; do {
+ int bound = M*eBands[effEnd];
+ if (st->downsample!=1)
+ bound = IMIN(bound, N/st->downsample);
+ for (i=bound;i<N;i++)
+ freq[c*N+i] = 0;
+ } while (++c<C);
+
+ c=0; do {
+ out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+ } while (++c<CC);
+
+ if (CC==2&&C==1)
+ {
+ for (i=0;i<N;i++)
+ freq[N+i] = freq[i];
+ }
+ if (CC==1&&C==2)
+ {
+ for (i=0;i<N;i++)
+ freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
+ }
+
+ /* Compute inverse MDCTs */
+ compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
+
+ c=0; do {
+ st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
+ st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
+ comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
+ st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
+ mode->window, overlap);
+ if (LM!=0)
+ comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
+ st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
+ mode->window, overlap);
+
+ } while (++c<CC);
+ st->postfilter_period_old = st->postfilter_period;
+ st->postfilter_gain_old = st->postfilter_gain;
+ st->postfilter_tapset_old = st->postfilter_tapset;
+ st->postfilter_period = postfilter_pitch;
+ st->postfilter_gain = postfilter_gain;
+ st->postfilter_tapset = postfilter_tapset;
+ if (LM!=0)
+ {
+ st->postfilter_period_old = st->postfilter_period;
+ st->postfilter_gain_old = st->postfilter_gain;
+ st->postfilter_tapset_old = st->postfilter_tapset;
+ }
+
+ if (C==1) {
+ for (i=0;i<nbEBands;i++)
+ oldBandE[nbEBands+i]=oldBandE[i];
+ }
+
+ /* In case start or end were to change */
+ if (!isTransient)
+ {
+ for (i=0;i<2*nbEBands;i++)
+ oldLogE2[i] = oldLogE[i];
+ for (i=0;i<2*nbEBands;i++)
+ oldLogE[i] = oldBandE[i];
+ for (i=0;i<2*nbEBands;i++)
+ backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
+ } else {
+ for (i=0;i<2*nbEBands;i++)
+ oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+ }
+ c=0; do
+ {
+ for (i=0;i<st->start;i++)
+ {
+ oldBandE[c*nbEBands+i]=0;
+ oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+ }
+ for (i=st->end;i<nbEBands;i++)
+ {
+ oldBandE[c*nbEBands+i]=0;
+ oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+ }
+ } while (++c<2);
+ st->rng = dec->rng;
+
+ /* We reuse freq[] as scratch space for the de-emphasis */
+ deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+ st->loss_count = 0;
+ RESTORE_STACK;
+ if (ec_tell(dec) > 8*len)
+ return OPUS_INTERNAL_ERROR;
+ if(ec_get_error(dec))
+ st->error = 1;
+ return frame_size/st->downsample;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef OPUS_FIXED_POINT
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+ return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+ int j, ret, C, N;
+ VARDECL(opus_int16, out);
+ ALLOC_STACK;
+
+ if (pcm==NULL)
+ return OPUS_BAD_ARG;
+
+ C = st->channels;
+ N = frame_size;
+
+ ALLOC(out, C*N, opus_int16);
+ ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+ if (ret>0)
+ for (j=0;j<C*ret;j++)
+ pcm[j]=out[j]*(1.f/32768.f);
+
+ RESTORE_STACK;
+ return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+
+#else
+
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+ return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+ int j, ret, C, N;
+ VARDECL(celt_sig, out);
+ ALLOC_STACK;
+
+ if (pcm==NULL)
+ return OPUS_BAD_ARG;
+
+ C = st->channels;
+ N = frame_size;
+ ALLOC(out, C*N, celt_sig);
+
+ ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+
+ if (ret>0)
+ for (j=0;j<C*ret;j++)
+ pcm[j] = FLOAT2INT16 (out[j]);
+
+ RESTORE_STACK;
+ return ret;
+}
+
+#endif
+#endif /* CUSTOM_MODES */
+
+int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
+{
+ va_list ap;
+
+ va_start(ap, request);
+ switch (request)
+ {
+ case CELT_SET_START_BAND_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<0 || value>=st->mode->nbEBands)
+ goto bad_arg;
+ st->start = value;
+ }
+ break;
+ case CELT_SET_END_BAND_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<1 || value>st->mode->nbEBands)
+ goto bad_arg;
+ st->end = value;
+ }
+ break;
+ case CELT_SET_CHANNELS_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<1 || value>2)
+ goto bad_arg;
+ st->stream_channels = value;
+ }
+ break;
+ case CELT_GET_AND_CLEAR_ERROR_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (value==NULL)
+ goto bad_arg;
+ *value=st->error;
+ st->error = 0;
+ }
+ break;
+ case OPUS_GET_LOOKAHEAD_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (value==NULL)
+ goto bad_arg;
+ *value = st->overlap/st->downsample;
+ }
+ break;
+ case OPUS_RESET_STATE:
+ {
+ int i;
+ opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
+ lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
+ oldBandE = lpc+st->channels*LPC_ORDER;
+ oldLogE = oldBandE + 2*st->mode->nbEBands;
+ oldLogE2 = oldLogE + 2*st->mode->nbEBands;
+ OPUS_CLEAR((char*)&st->DECODER_RESET_START,
+ opus_custom_decoder_get_size(st->mode, st->channels)-
+ ((char*)&st->DECODER_RESET_START - (char*)st));
+ for (i=0;i<2*st->mode->nbEBands;i++)
+ oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+ }
+ break;
+ case OPUS_GET_PITCH_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (value==NULL)
+ goto bad_arg;
+ *value = st->postfilter_period;
+ }
+ break;
+ case CELT_GET_MODE_REQUEST:
+ {
+ const CELTMode ** value = va_arg(ap, const CELTMode**);
+ if (value==0)
+ goto bad_arg;
+ *value=st->mode;
+ }
+ break;
+ case CELT_SET_SIGNALLING_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->signalling = value;
+ }
+ break;
+ case OPUS_GET_FINAL_RANGE_REQUEST:
+ {
+ opus_uint32 * value = va_arg(ap, opus_uint32 *);
+ if (value==0)
+ goto bad_arg;
+ *value=st->rng;
+ }
+ break;
+ default:
+ goto bad_request;
+ }
+ va_end(ap);
+ return OPUS_OK;
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
+bad_request:
+ va_end(ap);
+ return OPUS_UNIMPLEMENTED;
+}
diff --git a/drivers/opus/celt/celt_encoder.c b/drivers/opus/celt/celt_encoder.c
new file mode 100644
index 0000000000..a61e41f42d
--- /dev/null
+++ b/drivers/opus/celt/celt_encoder.c
@@ -0,0 +1,2353 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2010 Xiph.Org Foundation
+ Copyright (c) 2008 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define CELT_ENCODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "opus_modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+
+/** Encoder state
+ @brief Encoder state
+ */
+struct OpusCustomEncoder {
+ const OpusCustomMode *mode; /**< Mode used by the encoder */
+ int overlap;
+ int channels;
+ int stream_channels;
+
+ int force_intra;
+ int clip;
+ int disable_pf;
+ int complexity;
+ int upsample;
+ int start, end;
+
+ opus_int32 bitrate;
+ int vbr;
+ int signalling;
+ int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */
+ int loss_rate;
+ int lsb_depth;
+ int variable_duration;
+ int lfe;
+ int arch;
+
+ /* Everything beyond this point gets cleared on a reset */
+#define ENCODER_RESET_START rng
+
+ opus_uint32 rng;
+ int spread_decision;
+ opus_val32 delayedIntra;
+ int tonal_average;
+ int lastCodedBands;
+ int hf_average;
+ int tapset_decision;
+
+ int prefilter_period;
+ opus_val16 prefilter_gain;
+ int prefilter_tapset;
+#ifdef RESYNTH
+ int prefilter_period_old;
+ opus_val16 prefilter_gain_old;
+ int prefilter_tapset_old;
+#endif
+ int consec_transient;
+ AnalysisInfo analysis;
+
+ opus_val32 preemph_memE[2];
+ opus_val32 preemph_memD[2];
+
+ /* VBR-related parameters */
+ opus_int32 vbr_reservoir;
+ opus_int32 vbr_drift;
+ opus_int32 vbr_offset;
+ opus_int32 vbr_count;
+ opus_val32 overlap_max;
+ opus_val16 stereo_saving;
+ int intensity;
+ opus_val16 *energy_mask;
+ opus_val16 spec_avg;
+
+#ifdef RESYNTH
+ /* +MAX_PERIOD/2 to make space for overlap */
+ celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2];
+#endif
+
+ celt_sig in_mem[1]; /* Size = channels*mode->overlap */
+ /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */
+ /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */
+ /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */
+ /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */
+};
+
+int celt_encoder_get_size(int channels)
+{
+ CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+ return opus_custom_encoder_get_size(mode, channels);
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
+{
+ int size = sizeof(struct CELTEncoder)
+ + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */
+ + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
+ + 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */
+ /* opus_val16 oldLogE[channels*mode->nbEBands]; */
+ /* opus_val16 oldLogE2[channels*mode->nbEBands]; */
+ return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
+{
+ int ret;
+ CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
+ /* init will handle the NULL case */
+ ret = opus_custom_encoder_init(st, mode, channels);
+ if (ret != OPUS_OK)
+ {
+ opus_custom_encoder_destroy(st);
+ st = NULL;
+ }
+ if (error)
+ *error = ret;
+ return st;
+}
+#endif /* CUSTOM_MODES */
+
+static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
+ int channels, int arch)
+{
+ if (channels < 0 || channels > 2)
+ return OPUS_BAD_ARG;
+
+ if (st==NULL || mode==NULL)
+ return OPUS_ALLOC_FAIL;
+
+ OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
+
+ st->mode = mode;
+ st->overlap = mode->overlap;
+ st->stream_channels = st->channels = channels;
+
+ st->upsample = 1;
+ st->start = 0;
+ st->end = st->mode->effEBands;
+ st->signalling = 1;
+
+ st->arch = arch;
+
+ st->constrained_vbr = 1;
+ st->clip = 1;
+
+ st->bitrate = OPUS_BITRATE_MAX;
+ st->vbr = 0;
+ st->force_intra = 0;
+ st->complexity = 5;
+ st->lsb_depth=24;
+
+ opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
+
+ return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+{
+ return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch());
+}
+#endif
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+ int arch)
+{
+ int ret;
+ ret = opus_custom_encoder_init_arch(st,
+ opus_custom_mode_create(48000, 960, NULL), channels, arch);
+ if (ret != OPUS_OK)
+ return ret;
+ st->upsample = resampling_factor(sampling_rate);
+ return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_encoder_destroy(CELTEncoder *st)
+{
+ opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+
+static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
+ opus_val16 *tf_estimate, int *tf_chan)
+{
+ int i;
+ VARDECL(opus_val16, tmp);
+ opus_val32 mem0,mem1;
+ int is_transient = 0;
+ opus_int32 mask_metric = 0;
+ int c;
+ opus_val16 tf_max;
+ int len2;
+ /* Table of 6*64/x, trained on real data to minimize the average error */
+ static const unsigned char inv_table[128] = {
+ 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
+ 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12,
+ 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8,
+ 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
+ };
+ SAVE_STACK;
+ ALLOC(tmp, len, opus_val16);
+
+ len2=len/2;
+ for (c=0;c<C;c++)
+ {
+ opus_val32 mean;
+ opus_int32 unmask=0;
+ opus_val32 norm;
+ opus_val16 maxE;
+ mem0=0;
+ mem1=0;
+ /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
+ for (i=0;i<len;i++)
+ {
+ opus_val32 x,y;
+ x = SHR32(in[i+c*len],SIG_SHIFT);
+ y = ADD32(mem0, x);
+#ifdef OPUS_FIXED_POINT
+ mem0 = mem1 + y - SHL32(x,1);
+ mem1 = x - SHR32(y,1);
+#else
+ mem0 = mem1 + y - 2*x;
+ mem1 = x - .5f*y;
+#endif
+ tmp[i] = EXTRACT16(SHR32(y,2));
+ /*printf("%f ", tmp[i]);*/
+ }
+ /*printf("\n");*/
+ /* First few samples are bad because we don't propagate the memory */
+ for (i=0;i<12;i++)
+ tmp[i] = 0;
+
+#ifdef OPUS_FIXED_POINT
+ /* Normalize tmp to max range */
+ {
+ int shift=0;
+ shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len));
+ if (shift!=0)
+ {
+ for (i=0;i<len;i++)
+ tmp[i] = SHL16(tmp[i], shift);
+ }
+ }
+#endif
+
+ mean=0;
+ mem0=0;
+ /* Grouping by two to reduce complexity */
+ /* Forward pass to compute the post-echo threshold*/
+ for (i=0;i<len2;i++)
+ {
+ opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16);
+ mean += x2;
+#ifdef OPUS_FIXED_POINT
+ /* FIXME: Use PSHR16() instead */
+ tmp[i] = mem0 + PSHR32(x2-mem0,4);
+#else
+ tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0);
+#endif
+ mem0 = tmp[i];
+ }
+
+ mem0=0;
+ maxE=0;
+ /* Backward pass to compute the pre-echo threshold */
+ for (i=len2-1;i>=0;i--)
+ {
+#ifdef OPUS_FIXED_POINT
+ /* FIXME: Use PSHR16() instead */
+ tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
+#else
+ tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
+#endif
+ mem0 = tmp[i];
+ maxE = MAX16(maxE, mem0);
+ }
+ /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
+
+ /* Compute the ratio of the "frame energy" over the harmonic mean of the energy.
+ This essentially corresponds to a bitrate-normalized temporal noise-to-mask
+ ratio */
+
+ /* As a compromise with the old transient detector, frame energy is the
+ geometric mean of the energy and half the max */
+#ifdef OPUS_FIXED_POINT
+ /* Costs two sqrt() to avoid overflows */
+ mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1)));
+#else
+ mean = celt_sqrt(mean * maxE*.5*len2);
+#endif
+ /* Inverse of the mean energy in Q15+6 */
+ norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1));
+ /* Compute harmonic mean discarding the unreliable boundaries
+ The data is smooth, so we only take 1/4th of the samples */
+ unmask=0;
+ for (i=12;i<len2-5;i+=4)
+ {
+ int id;
+#ifdef OPUS_FIXED_POINT
+ id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */
+#else
+ id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */
+#endif
+ unmask += inv_table[id];
+ }
+ /*printf("%d\n", unmask);*/
+ /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */
+ unmask = 64*unmask*4/(6*(len2-17));
+ if (unmask>mask_metric)
+ {
+ *tf_chan = c;
+ mask_metric = unmask;
+ }
+ }
+ is_transient = mask_metric>200;
+
+ /* Arbitrary metric for VBR boost */
+ tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
+ /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
+ *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
+ /*printf("%d %f\n", tf_max, mask_metric);*/
+ RESTORE_STACK;
+#ifdef FUZZING
+ is_transient = rand()&0x1;
+#endif
+ /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/
+ return is_transient;
+}
+
+/* Looks for sudden increases of energy to decide whether we need to patch
+ the transient decision */
+int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
+ int end, int C)
+{
+ int i, c;
+ opus_val32 mean_diff=0;
+ opus_val16 spread_old[26];
+ /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to
+ avoid false detection caused by irrelevant bands */
+ if (C==1)
+ {
+ spread_old[0] = oldE[0];
+ for (i=1;i<end;i++)
+ spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
+ } else {
+ spread_old[0] = MAX16(oldE[0],oldE[nbEBands]);
+ for (i=1;i<end;i++)
+ spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
+ MAX16(oldE[i],oldE[i+nbEBands]));
+ }
+ for (i=end-2;i>=0;i--)
+ spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
+ /* Compute mean increase */
+ c=0; do {
+ for (i=2;i<end-1;i++)
+ {
+ opus_val16 x1, x2;
+ x1 = MAX16(0, newE[i]);
+ x2 = MAX16(0, spread_old[i]);
+ mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
+ }
+ } while (++c<C);
+ mean_diff = DIV32(mean_diff, C*(end-3));
+ /*printf("%f %f %d\n", mean_diff, max_diff, count);*/
+ return mean_diff > QCONST16(1.f, DB_SHIFT);
+}
+
+/** Apply window and compute the MDCT for all sub-frames and
+ all channels in a frame */
+static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
+ celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
+{
+ const int overlap = OVERLAP(mode);
+ int N;
+ int B;
+ int shift;
+ int i, b, c;
+ if (shortBlocks)
+ {
+ B = shortBlocks;
+ N = mode->shortMdctSize;
+ shift = mode->maxLM;
+ } else {
+ B = 1;
+ N = mode->shortMdctSize<<LM;
+ shift = mode->maxLM-LM;
+ }
+ c=0; do {
+ for (b=0;b<B;b++)
+ {
+ /* Interleaving the sub-frames while doing the MDCTs */
+ clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
+ }
+ } while (++c<CC);
+ if (CC==2&&C==1)
+ {
+ for (i=0;i<B*N;i++)
+ out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i]));
+ }
+ if (upsample != 1)
+ {
+ c=0; do
+ {
+ int bound = B*N/upsample;
+ for (i=0;i<bound;i++)
+ out[c*B*N+i] *= upsample;
+ for (;i<B*N;i++)
+ out[c*B*N+i] = 0;
+ } while (++c<C);
+ }
+}
+
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+ int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
+{
+ int i;
+ opus_val16 coef0;
+ celt_sig m;
+ int Nu;
+
+ coef0 = coef[0];
+
+
+ Nu = N/upsample;
+ if (upsample!=1)
+ {
+ for (i=0;i<N;i++)
+ inp[i] = 0;
+ }
+ for (i=0;i<Nu;i++)
+ {
+ celt_sig x;
+
+ x = SCALEIN(pcmp[CC*i]);
+#ifndef OPUS_FIXED_POINT
+ /* Replace NaNs with zeros */
+ if (!(x==x))
+ x = 0;
+#endif
+ inp[i*upsample] = x;
+ }
+
+#ifndef OPUS_FIXED_POINT
+ if (clip)
+ {
+ /* Clip input to avoid encoding non-portable files */
+ for (i=0;i<Nu;i++)
+ inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
+ }
+#else
+ (void)clip; /* Avoids a warning about clip being unused. */
+#endif
+ m = *mem;
+#ifdef CUSTOM_MODES
+ if (coef[1] != 0)
+ {
+ opus_val16 coef1 = coef[1];
+ opus_val16 coef2 = coef[2];
+ for (i=0;i<N;i++)
+ {
+ celt_sig x, tmp;
+ x = inp[i];
+ /* Apply pre-emphasis */
+ tmp = MULT16_16(coef2, x);
+ inp[i] = tmp + m;
+ m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp);
+ }
+ } else
+#endif
+ {
+ for (i=0;i<N;i++)
+ {
+ celt_sig x;
+ x = SHL32(inp[i], SIG_SHIFT);
+ /* Apply pre-emphasis */
+ inp[i] = x + m;
+ m = - MULT16_32_Q15(coef0, x);
+ }
+ }
+ *mem = m;
+}
+
+
+
+static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
+{
+ int i;
+ opus_val32 L1;
+ L1 = 0;
+ for (i=0;i<N;i++)
+ L1 += EXTEND32(ABS16(tmp[i]));
+ /* When in doubt, prefer good freq resolution */
+ L1 = MAC16_32_Q15(L1, LM*bias, L1);
+ return L1;
+
+}
+
+static int tf_analysis(const CELTMode *m, int len, int isTransient,
+ int *tf_res, int lambda, celt_norm *X, int N0, int LM,
+ int *tf_sum, opus_val16 tf_estimate, int tf_chan)
+{
+ int i;
+ VARDECL(int, metric);
+ int cost0;
+ int cost1;
+ VARDECL(int, path0);
+ VARDECL(int, path1);
+ VARDECL(celt_norm, tmp);
+ VARDECL(celt_norm, tmp_1);
+ int sel;
+ int selcost[2];
+ int tf_select=0;
+ opus_val16 bias;
+
+ SAVE_STACK;
+ bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate));
+ /*printf("%f ", bias);*/
+
+ ALLOC(metric, len, int);
+ ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+ ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+ ALLOC(path0, len, int);
+ ALLOC(path1, len, int);
+
+ *tf_sum = 0;
+ for (i=0;i<len;i++)
+ {
+ int j, k, N;
+ int narrow;
+ opus_val32 L1, best_L1;
+ int best_level=0;
+ N = (m->eBands[i+1]-m->eBands[i])<<LM;
+ /* band is too narrow to be split down to LM=-1 */
+ narrow = (m->eBands[i+1]-m->eBands[i])==1;
+ for (j=0;j<N;j++)
+ tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
+ /* Just add the right channel if we're in stereo */
+ /*if (C==2)
+ for (j=0;j<N;j++)
+ tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
+ L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
+ best_L1 = L1;
+ /* Check the -1 case for transients */
+ if (isTransient && !narrow)
+ {
+ for (j=0;j<N;j++)
+ tmp_1[j] = tmp[j];
+ haar1(tmp_1, N>>LM, 1<<LM);
+ L1 = l1_metric(tmp_1, N, LM+1, bias);
+ if (L1<best_L1)
+ {
+ best_L1 = L1;
+ best_level = -1;
+ }
+ }
+ /*printf ("%f ", L1);*/
+ for (k=0;k<LM+!(isTransient||narrow);k++)
+ {
+ int B;
+
+ if (isTransient)
+ B = (LM-k-1);
+ else
+ B = k+1;
+
+ haar1(tmp, N>>k, 1<<k);
+
+ L1 = l1_metric(tmp, N, B, bias);
+
+ if (L1 < best_L1)
+ {
+ best_L1 = L1;
+ best_level = k+1;
+ }
+ }
+ /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
+ /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
+ if (isTransient)
+ metric[i] = 2*best_level;
+ else
+ metric[i] = -2*best_level;
+ *tf_sum += (isTransient ? LM : 0) - metric[i]/2;
+ /* For bands that can't be split to -1, set the metric to the half-way point to avoid
+ biasing the decision */
+ if (narrow && (metric[i]==0 || metric[i]==-2*LM))
+ metric[i]-=1;
+ /*printf("%d ", metric[i]);*/
+ }
+ /*printf("\n");*/
+ /* Search for the optimal tf resolution, including tf_select */
+ tf_select = 0;
+ for (sel=0;sel<2;sel++)
+ {
+ cost0 = 0;
+ cost1 = isTransient ? 0 : lambda;
+ for (i=1;i<len;i++)
+ {
+ int curr0, curr1;
+ curr0 = IMIN(cost0, cost1 + lambda);
+ curr1 = IMIN(cost0 + lambda, cost1);
+ cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+ cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
+ }
+ cost0 = IMIN(cost0, cost1);
+ selcost[sel]=cost0;
+ }
+ /* For now, we're conservative and only allow tf_select=1 for transients.
+ * If tests confirm it's useful for non-transients, we could allow it. */
+ if (selcost[1]<selcost[0] && isTransient)
+ tf_select=1;
+ cost0 = 0;
+ cost1 = isTransient ? 0 : lambda;
+ /* Viterbi forward pass */
+ for (i=1;i<len;i++)
+ {
+ int curr0, curr1;
+ int from0, from1;
+
+ from0 = cost0;
+ from1 = cost1 + lambda;
+ if (from0 < from1)
+ {
+ curr0 = from0;
+ path0[i]= 0;
+ } else {
+ curr0 = from1;
+ path0[i]= 1;
+ }
+
+ from0 = cost0 + lambda;
+ from1 = cost1;
+ if (from0 < from1)
+ {
+ curr1 = from0;
+ path1[i]= 0;
+ } else {
+ curr1 = from1;
+ path1[i]= 1;
+ }
+ cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+ cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
+ }
+ tf_res[len-1] = cost0 < cost1 ? 0 : 1;
+ /* Viterbi backward pass to check the decisions */
+ for (i=len-2;i>=0;i--)
+ {
+ if (tf_res[i+1] == 1)
+ tf_res[i] = path1[i+1];
+ else
+ tf_res[i] = path0[i+1];
+ }
+ /*printf("%d %f\n", *tf_sum, tf_estimate);*/
+ RESTORE_STACK;
+#ifdef FUZZING
+ tf_select = rand()&0x1;
+ tf_res[0] = rand()&0x1;
+ for (i=1;i<len;i++)
+ tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
+#endif
+ return tf_select;
+}
+
+static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
+{
+ int curr, i;
+ int tf_select_rsv;
+ int tf_changed;
+ int logp;
+ opus_uint32 budget;
+ opus_uint32 tell;
+ budget = enc->storage*8;
+ tell = ec_tell(enc);
+ logp = isTransient ? 2 : 4;
+ /* Reserve space to code the tf_select decision. */
+ tf_select_rsv = LM>0 && tell+logp+1 <= budget;
+ budget -= tf_select_rsv;
+ curr = tf_changed = 0;
+ for (i=start;i<end;i++)
+ {
+ if (tell+logp<=budget)
+ {
+ ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
+ tell = ec_tell(enc);
+ curr = tf_res[i];
+ tf_changed |= curr;
+ }
+ else
+ tf_res[i] = curr;
+ logp = isTransient ? 4 : 5;
+ }
+ /* Only code tf_select if it would actually make a difference. */
+ if (tf_select_rsv &&
+ tf_select_table[LM][4*isTransient+0+tf_changed]!=
+ tf_select_table[LM][4*isTransient+2+tf_changed])
+ ec_enc_bit_logp(enc, tf_select, 1);
+ else
+ tf_select = 0;
+ for (i=start;i<end;i++)
+ tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+ /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
+}
+
+
+static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
+ const opus_val16 *bandLogE, int end, int LM, int C, int N0,
+ AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
+ int intensity, opus_val16 surround_trim)
+{
+ int i;
+ opus_val32 diff=0;
+ int c;
+ int trim_index = 5;
+ opus_val16 trim = QCONST16(5.f, 8);
+ opus_val16 logXC, logXC2;
+ if (C==2)
+ {
+ opus_val16 sum = 0; /* Q10 */
+ opus_val16 minXC; /* Q10 */
+ /* Compute inter-channel correlation for low frequencies */
+ for (i=0;i<8;i++)
+ {
+ int j;
+ opus_val32 partial = 0;
+ for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+ partial = MAC16_16(partial, X[j], X[N0+j]);
+ sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
+ }
+ sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
+ sum = MIN16(QCONST16(1.f, 10), ABS16(sum));
+ minXC = sum;
+ for (i=8;i<intensity;i++)
+ {
+ int j;
+ opus_val32 partial = 0;
+ for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+ partial = MAC16_16(partial, X[j], X[N0+j]);
+ minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
+ }
+ minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
+ /*printf ("%f\n", sum);*/
+ if (sum > QCONST16(.995f,10))
+ trim_index-=4;
+ else if (sum > QCONST16(.92f,10))
+ trim_index-=3;
+ else if (sum > QCONST16(.85f,10))
+ trim_index-=2;
+ else if (sum > QCONST16(.8f,10))
+ trim_index-=1;
+ /* mid-side savings estimations based on the LF average*/
+ logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
+ /* mid-side savings estimations based on min correlation */
+ logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
+#ifdef OPUS_FIXED_POINT
+ /* Compensate for Q20 vs Q14 input and convert output to Q8 */
+ logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+ logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+#endif
+
+ trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
+ *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
+ }
+
+ /* Estimate spectral tilt */
+ c=0; do {
+ for (i=0;i<end-1;i++)
+ {
+ diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
+ }
+ } while (++c<C);
+ diff /= C*(end-1);
+ /*printf("%f\n", diff);*/
+ if (diff > QCONST16(2.f, DB_SHIFT))
+ trim_index--;
+ if (diff > QCONST16(8.f, DB_SHIFT))
+ trim_index--;
+ if (diff < -QCONST16(4.f, DB_SHIFT))
+ trim_index++;
+ if (diff < -QCONST16(10.f, DB_SHIFT))
+ trim_index++;
+ trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+ trim -= SHR16(surround_trim, DB_SHIFT-8);
+ trim -= 2*SHR16(tf_estimate, 14-8);
+#ifndef DISABLE_FLOAT_API
+ if (analysis->valid)
+ {
+ trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
+ (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
+ }
+#endif
+
+#ifdef OPUS_FIXED_POINT
+ trim_index = PSHR32(trim, 8);
+#else
+ trim_index = (int)floor(.5f+trim);
+#endif
+ if (trim_index<0)
+ trim_index = 0;
+ if (trim_index>10)
+ trim_index = 10;
+ /*printf("%d\n", trim_index);*/
+#ifdef FUZZING
+ trim_index = rand()%11;
+#endif
+ return trim_index;
+}
+
+static int stereo_analysis(const CELTMode *m, const celt_norm *X,
+ int LM, int N0)
+{
+ int i;
+ int thetas;
+ opus_val32 sumLR = EPSILON, sumMS = EPSILON;
+
+ /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
+ for (i=0;i<13;i++)
+ {
+ int j;
+ for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+ {
+ opus_val32 L, R, M, S;
+ /* We cast to 32-bit first because of the -32768 case */
+ L = EXTEND32(X[j]);
+ R = EXTEND32(X[N0+j]);
+ M = ADD32(L, R);
+ S = SUB32(L, R);
+ sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
+ sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
+ }
+ }
+ sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
+ thetas = 13;
+ /* We don't need thetas for lower bands with LM<=1 */
+ if (LM<=1)
+ thetas -= 8;
+ return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
+ > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
+}
+
+static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
+ int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
+ int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
+ int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
+{
+ int i, c;
+ opus_int32 tot_boost=0;
+ opus_val16 maxDepth;
+ VARDECL(opus_val16, follower);
+ VARDECL(opus_val16, noise_floor);
+ SAVE_STACK;
+ ALLOC(follower, C*nbEBands, opus_val16);
+ ALLOC(noise_floor, C*nbEBands, opus_val16);
+ for (i=0;i<nbEBands;i++)
+ offsets[i] = 0;
+ /* Dynamic allocation code */
+ maxDepth=-QCONST16(31.9f, DB_SHIFT);
+ for (i=0;i<end;i++)
+ {
+ /* Noise floor must take into account eMeans, the depth, the width of the bands
+ and the preemphasis filter (approx. square of bark band ID) */
+ noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i])
+ +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
+ +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
+ }
+ c=0;do
+ {
+ for (i=0;i<end;i++)
+ maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
+ } while (++c<C);
+ /* Make sure that dynamic allocation can't make us bust the budget */
+ if (effectiveBytes > 50 && LM>=1 && !lfe)
+ {
+ int last=0;
+ c=0;do
+ {
+ follower[c*nbEBands] = bandLogE2[c*nbEBands];
+ for (i=1;i<end;i++)
+ {
+ /* The last band to be at least 3 dB higher than the previous one
+ is the last we'll consider. Otherwise, we run into problems on
+ bandlimited signals. */
+ if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+ last=i;
+ follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
+ }
+ for (i=last-1;i>=0;i--)
+ follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
+ for (i=0;i<end;i++)
+ follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]);
+ } while (++c<C);
+ if (C==2)
+ {
+ for (i=start;i<end;i++)
+ {
+ /* Consider 24 dB "cross-talk" */
+ follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT));
+ follower[ i] = MAX16(follower[ i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT));
+ follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));
+ }
+ } else {
+ for (i=start;i<end;i++)
+ {
+ follower[i] = MAX16(0, bandLogE[i]-follower[i]);
+ }
+ }
+ for (i=start;i<end;i++)
+ follower[i] = MAX16(follower[i], surround_dynalloc[i]);
+ /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
+ if ((!vbr || constrained_vbr)&&!isTransient)
+ {
+ for (i=start;i<end;i++)
+ follower[i] = HALF16(follower[i]);
+ }
+ for (i=start;i<end;i++)
+ {
+ int width;
+ int boost;
+ int boost_bits;
+
+ if (i<8)
+ follower[i] *= 2;
+ if (i>=12)
+ follower[i] = HALF16(follower[i]);
+ follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
+
+ width = C*(eBands[i+1]-eBands[i])<<LM;
+ if (width<6)
+ {
+ boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT);
+ boost_bits = boost*width<<BITRES;
+ } else if (width > 48) {
+ boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
+ boost_bits = (boost*width<<BITRES)/8;
+ } else {
+ boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
+ boost_bits = boost*6<<BITRES;
+ }
+ /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
+ if ((!vbr || (constrained_vbr&&!isTransient))
+ && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
+ {
+ opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3);
+ offsets[i] = cap-tot_boost;
+ tot_boost = cap;
+ break;
+ } else {
+ offsets[i] = boost;
+ tot_boost += boost_bits;
+ }
+ }
+ }
+ *tot_boost_ = tot_boost;
+ RESTORE_STACK;
+ return maxDepth;
+}
+
+
+static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
+ int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
+{
+ int c;
+ VARDECL(celt_sig, _pre);
+ celt_sig *pre[2];
+ const CELTMode *mode;
+ int pitch_index;
+ opus_val16 gain1;
+ opus_val16 pf_threshold;
+ int pf_on;
+ int qg;
+ SAVE_STACK;
+
+ mode = st->mode;
+ ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
+
+ pre[0] = _pre;
+ pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
+
+
+ c=0; do {
+ OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
+ OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
+ } while (++c<CC);
+
+ if (enabled)
+ {
+ VARDECL(opus_val16, pitch_buf);
+ ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
+
+ pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
+ /* Don't search for the fir last 1.5 octave of the range because
+ there's too many false-positives due to short-term correlation */
+ pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
+ COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index,
+ st->arch);
+ pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
+
+ gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
+ N, &pitch_index, st->prefilter_period, st->prefilter_gain);
+ if (pitch_index > COMBFILTER_MAXPERIOD-2)
+ pitch_index = COMBFILTER_MAXPERIOD-2;
+ gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+ /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
+ if (st->loss_rate>2)
+ gain1 = HALF32(gain1);
+ if (st->loss_rate>4)
+ gain1 = HALF32(gain1);
+ if (st->loss_rate>8)
+ gain1 = 0;
+ } else {
+ gain1 = 0;
+ pitch_index = COMBFILTER_MINPERIOD;
+ }
+
+ /* Gain threshold for enabling the prefilter/postfilter */
+ pf_threshold = QCONST16(.2f,15);
+
+ /* Adjusting the threshold based on rate and continuity */
+ if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
+ pf_threshold += QCONST16(.2f,15);
+ if (nbAvailableBytes<25)
+ pf_threshold += QCONST16(.1f,15);
+ if (nbAvailableBytes<35)
+ pf_threshold += QCONST16(.1f,15);
+ if (st->prefilter_gain > QCONST16(.4f,15))
+ pf_threshold -= QCONST16(.1f,15);
+ if (st->prefilter_gain > QCONST16(.55f,15))
+ pf_threshold -= QCONST16(.1f,15);
+
+ /* Hard threshold at 0.2 */
+ pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
+ if (gain1<pf_threshold)
+ {
+ gain1 = 0;
+ pf_on = 0;
+ qg = 0;
+ } else {
+ /*This block is not gated by a total bits check only because
+ of the nbAvailableBytes check above.*/
+ if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
+ gain1=st->prefilter_gain;
+
+#ifdef OPUS_FIXED_POINT
+ qg = ((gain1+1536)>>10)/3-1;
+#else
+ qg = (int)floor(.5f+gain1*32/3)-1;
+#endif
+ qg = IMAX(0, IMIN(7, qg));
+ gain1 = QCONST16(0.09375f,15)*(qg+1);
+ pf_on = 1;
+ }
+ /*printf("%d %f\n", pitch_index, gain1);*/
+
+ c=0; do {
+ int offset = mode->shortMdctSize-st->overlap;
+ st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+ OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
+ if (offset)
+ comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
+ st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
+ st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
+
+ comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
+ st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
+ st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap);
+ OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
+
+ if (N>COMBFILTER_MAXPERIOD)
+ {
+ OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+ } else {
+ OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
+ OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+ }
+ } while (++c<CC);
+
+ RESTORE_STACK;
+ *gain = gain1;
+ *pitch = pitch_index;
+ *qgain = qg;
+ return pf_on;
+}
+
+static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target,
+ int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity,
+ int constrained_vbr, opus_val16 stereo_saving, int tot_boost,
+ opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth,
+ int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking,
+ opus_val16 temporal_vbr)
+{
+ /* The target rate in 8th bits per frame */
+ opus_int32 target;
+ int coded_bins;
+ int coded_bands;
+ opus_val16 tf_calibration;
+ int nbEBands;
+ const opus_int16 *eBands;
+
+ nbEBands = mode->nbEBands;
+ eBands = mode->eBands;
+
+ coded_bands = lastCodedBands ? lastCodedBands : nbEBands;
+ coded_bins = eBands[coded_bands]<<LM;
+ if (C==2)
+ coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM;
+
+ target = base_target;
+
+ /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
+#ifndef DISABLE_FLOAT_API
+ if (analysis->valid && analysis->activity<.4)
+ target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
+#endif
+ /* Stereo savings */
+ if (C==2)
+ {
+ int coded_stereo_bands;
+ int coded_stereo_dof;
+ opus_val16 max_frac;
+ coded_stereo_bands = IMIN(intensity, coded_bands);
+ coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
+ /* Maximum fraction of the bits we can save if the signal is mono. */
+ max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
+ stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8));
+ /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
+ target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
+ SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
+ }
+ /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
+ target += tot_boost-(16<<LM);
+ /* Apply transient boost, compensating for average boost. */
+ tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ?
+ QCONST16(0.02f,14) : QCONST16(0.04f,14);
+ target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
+
+#ifndef DISABLE_FLOAT_API
+ /* Apply tonality boost */
+ if (analysis->valid && !lfe)
+ {
+ opus_int32 tonal_target;
+ float tonal;
+
+ /* Tonality boost (compensating for the average). */
+ tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f;
+ tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
+ if (pitch_change)
+ tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f);
+ /*printf("%f %f ", analysis->tonality, tonal);*/
+ target = tonal_target;
+ }
+#endif
+
+ if (has_surround_mask&&!lfe)
+ {
+ opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);
+ /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/
+ target = IMAX(target/4, surround_target);
+ }
+
+ {
+ opus_int32 floor_depth;
+ int bins;
+ bins = eBands[nbEBands-2]<<LM;
+ /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
+ floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
+ floor_depth = IMAX(floor_depth, target>>2);
+ target = IMIN(target, floor_depth);
+ /*printf("%f %d\n", maxDepth, floor_depth);*/
+ }
+
+ if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
+ {
+ opus_val16 rate_factor;
+#ifdef OPUS_FIXED_POINT
+ rate_factor = MAX16(0,(bitrate-32000));
+#else
+ rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
+#endif
+ if (constrained_vbr)
+ rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
+ target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
+
+ }
+
+ if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
+ {
+ opus_val16 amount;
+ opus_val16 tvbr_factor;
+ amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate)));
+ tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT);
+ target += (opus_int32)MULT16_32_Q15(tvbr_factor, target);
+ }
+
+ /* Don't allow more than doubling the rate */
+ target = IMIN(2*base_target, target);
+
+ return target;
+}
+
+int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+{
+ int i, c, N;
+ opus_int32 bits;
+ ec_enc _enc;
+ VARDECL(celt_sig, in);
+ VARDECL(celt_sig, freq);
+ VARDECL(celt_norm, X);
+ VARDECL(celt_ener, bandE);
+ VARDECL(opus_val16, bandLogE);
+ VARDECL(opus_val16, bandLogE2);
+ VARDECL(int, fine_quant);
+ VARDECL(opus_val16, error);
+ VARDECL(int, pulses);
+ VARDECL(int, cap);
+ VARDECL(int, offsets);
+ VARDECL(int, fine_priority);
+ VARDECL(int, tf_res);
+ VARDECL(unsigned char, collapse_masks);
+ celt_sig *prefilter_mem;
+ opus_val16 *oldBandE, *oldLogE, *oldLogE2;
+ int shortBlocks=0;
+ int isTransient=0;
+ const int CC = st->channels;
+ const int C = st->stream_channels;
+ int LM, M;
+ int tf_select;
+ int nbFilledBytes, nbAvailableBytes;
+ int effEnd;
+ int codedBands;
+ int tf_sum;
+ int alloc_trim;
+ int pitch_index=COMBFILTER_MINPERIOD;
+ opus_val16 gain1 = 0;
+ int dual_stereo=0;
+ int effectiveBytes;
+ int dynalloc_logp;
+ opus_int32 vbr_rate;
+ opus_int32 total_bits;
+ opus_int32 total_boost;
+ opus_int32 balance;
+ opus_int32 tell;
+ int prefilter_tapset=0;
+ int pf_on;
+ int anti_collapse_rsv;
+ int anti_collapse_on=0;
+ int silence=0;
+ int tf_chan = 0;
+ opus_val16 tf_estimate;
+ int pitch_change=0;
+ opus_int32 tot_boost;
+ opus_val32 sample_max;
+ opus_val16 maxDepth;
+ const OpusCustomMode *mode;
+ int nbEBands;
+ int overlap;
+ const opus_int16 *eBands;
+ int secondMdct;
+ int signalBandwidth;
+ int transient_got_disabled=0;
+ opus_val16 surround_masking=0;
+ opus_val16 temporal_vbr=0;
+ opus_val16 surround_trim = 0;
+ opus_int32 equiv_rate = 510000;
+ VARDECL(opus_val16, surround_dynalloc);
+ ALLOC_STACK;
+
+ mode = st->mode;
+ nbEBands = mode->nbEBands;
+ overlap = mode->overlap;
+ eBands = mode->eBands;
+ tf_estimate = 0;
+ if (nbCompressedBytes<2 || pcm==NULL)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+
+ frame_size *= st->upsample;
+ for (LM=0;LM<=mode->maxLM;LM++)
+ if (mode->shortMdctSize<<LM==frame_size)
+ break;
+ if (LM>mode->maxLM)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ M=1<<LM;
+ N = M*mode->shortMdctSize;
+
+ prefilter_mem = st->in_mem+CC*(st->overlap);
+ oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD));
+ oldLogE = oldBandE + CC*nbEBands;
+ oldLogE2 = oldLogE + CC*nbEBands;
+
+ if (enc==NULL)
+ {
+ tell=1;
+ nbFilledBytes=0;
+ } else {
+ tell=ec_tell(enc);
+ nbFilledBytes=(tell+4)>>3;
+ }
+
+#ifdef CUSTOM_MODES
+ if (st->signalling && enc==NULL)
+ {
+ int tmp = (mode->effEBands-st->end)>>1;
+ st->end = IMAX(1, mode->effEBands-tmp);
+ compressed[0] = tmp<<5;
+ compressed[0] |= LM<<3;
+ compressed[0] |= (C==2)<<2;
+ /* Convert "standard mode" to Opus header */
+ if (mode->Fs==48000 && mode->shortMdctSize==120)
+ {
+ int c0 = toOpus(compressed[0]);
+ if (c0<0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ compressed[0] = c0;
+ }
+ compressed++;
+ nbCompressedBytes--;
+ }
+#else
+ celt_assert(st->signalling==0);
+#endif
+
+ /* Can't produce more than 1275 output bytes */
+ nbCompressedBytes = IMIN(nbCompressedBytes,1275);
+ nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
+
+ if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
+ {
+ opus_int32 den=mode->Fs>>BITRES;
+ vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
+#ifdef CUSTOM_MODES
+ if (st->signalling)
+ vbr_rate -= 8<<BITRES;
+#endif
+ effectiveBytes = vbr_rate>>(3+BITRES);
+ } else {
+ opus_int32 tmp;
+ vbr_rate = 0;
+ tmp = st->bitrate*frame_size;
+ if (tell>1)
+ tmp += tell;
+ if (st->bitrate!=OPUS_BITRATE_MAX)
+ nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
+ (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
+ effectiveBytes = nbCompressedBytes;
+ }
+ if (st->bitrate != OPUS_BITRATE_MAX)
+ equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50);
+
+ if (enc==NULL)
+ {
+ ec_enc_init(&_enc, compressed, nbCompressedBytes);
+ enc = &_enc;
+ }
+
+ if (vbr_rate>0)
+ {
+ /* Computes the max bit-rate allowed in VBR mode to avoid violating the
+ target rate and buffering.
+ We must do this up front so that bust-prevention logic triggers
+ correctly if we don't have enough bits. */
+ if (st->constrained_vbr)
+ {
+ opus_int32 vbr_bound;
+ opus_int32 max_allowed;
+ /* We could use any multiple of vbr_rate as bound (depending on the
+ delay).
+ This is clamped to ensure we use at least two bytes if the encoder
+ was entirely empty, but to allow 0 in hybrid mode. */
+ vbr_bound = vbr_rate;
+ max_allowed = IMIN(IMAX(tell==1?2:0,
+ (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
+ nbAvailableBytes);
+ if(max_allowed < nbAvailableBytes)
+ {
+ nbCompressedBytes = nbFilledBytes+max_allowed;
+ nbAvailableBytes = max_allowed;
+ ec_enc_shrink(enc, nbCompressedBytes);
+ }
+ }
+ }
+ total_bits = nbCompressedBytes*8;
+
+ effEnd = st->end;
+ if (effEnd > mode->effEBands)
+ effEnd = mode->effEBands;
+
+ ALLOC(in, CC*(N+st->overlap), celt_sig);
+
+ sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
+ st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
+ sample_max=MAX32(sample_max, st->overlap_max);
+#ifdef OPUS_FIXED_POINT
+ silence = (sample_max==0);
+#else
+ silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
+#endif
+#ifdef FUZZING
+ if ((rand()&0x3F)==0)
+ silence = 1;
+#endif
+ if (tell==1)
+ ec_enc_bit_logp(enc, silence, 15);
+ else
+ silence=0;
+ if (silence)
+ {
+ /*In VBR mode there is no need to send more than the minimum. */
+ if (vbr_rate>0)
+ {
+ effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
+ total_bits=nbCompressedBytes*8;
+ nbAvailableBytes=2;
+ ec_enc_shrink(enc, nbCompressedBytes);
+ }
+ /* Pretend we've filled all the remaining bits with zeros
+ (that's what the initialiser did anyway) */
+ tell = nbCompressedBytes*8;
+ enc->nbits_total+=tell-ec_tell(enc);
+ }
+ c=0; do {
+ celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
+ mode->preemph, st->preemph_memE+c, st->clip);
+ } while (++c<CC);
+
+
+
+ /* Find pitch period and gain */
+ {
+ int enabled;
+ int qg;
+ enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
+ && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
+
+ prefilter_tapset = st->tapset_decision;
+ pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
+ if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
+ && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
+ pitch_change = 1;
+ if (pf_on==0)
+ {
+ if(st->start==0 && tell+16<=total_bits)
+ ec_enc_bit_logp(enc, 0, 1);
+ } else {
+ /*This block is not gated by a total bits check only because
+ of the nbAvailableBytes check above.*/
+ int octave;
+ ec_enc_bit_logp(enc, 1, 1);
+ pitch_index += 1;
+ octave = EC_ILOG(pitch_index)-5;
+ ec_enc_uint(enc, octave, 6);
+ ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
+ pitch_index -= 1;
+ ec_enc_bits(enc, qg, 3);
+ ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
+ }
+ }
+
+ isTransient = 0;
+ shortBlocks = 0;
+ if (st->complexity >= 1 && !st->lfe)
+ {
+ isTransient = transient_analysis(in, N+st->overlap, CC,
+ &tf_estimate, &tf_chan);
+ }
+ if (LM>0 && ec_tell(enc)+3<=total_bits)
+ {
+ if (isTransient)
+ shortBlocks = M;
+ } else {
+ isTransient = 0;
+ transient_got_disabled=1;
+ }
+
+ ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
+ ALLOC(bandE,nbEBands*CC, celt_ener);
+ ALLOC(bandLogE,nbEBands*CC, opus_val16);
+
+ secondMdct = shortBlocks && st->complexity>=8;
+ ALLOC(bandLogE2, C*nbEBands, opus_val16);
+ if (secondMdct)
+ {
+ compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
+ compute_band_energies(mode, freq, bandE, effEnd, C, M);
+ amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C);
+ for (i=0;i<C*nbEBands;i++)
+ bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+ }
+
+ compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+ if (CC==2&&C==1)
+ tf_chan = 0;
+ compute_band_energies(mode, freq, bandE, effEnd, C, M);
+
+ if (st->lfe)
+ {
+ for (i=2;i<st->end;i++)
+ {
+ bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
+ bandE[i] = MAX32(bandE[i], EPSILON);
+ }
+ }
+ amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+
+ ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+ for(i=0;i<st->end;i++)
+ surround_dynalloc[i] = 0;
+ /* This computes how much masking takes place between surround channels */
+ if (st->start==0&&st->energy_mask&&!st->lfe)
+ {
+ int mask_end;
+ int midband;
+ int count_dynalloc;
+ opus_val32 mask_avg=0;
+ opus_val32 diff=0;
+ int count=0;
+ mask_end = IMAX(2,st->lastCodedBands);
+ for (c=0;c<C;c++)
+ {
+ for(i=0;i<mask_end;i++)
+ {
+ opus_val16 mask;
+ mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+ QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+ if (mask > 0)
+ mask = HALF16(mask);
+ mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+ count += eBands[i+1]-eBands[i];
+ diff += MULT16_16(mask, 1+2*i-mask_end);
+ }
+ }
+ mask_avg = DIV32_16(mask_avg,count);
+ mask_avg += QCONST16(.2f, DB_SHIFT);
+ diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+ /* Again, being conservative */
+ diff = HALF32(diff);
+ diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+ /* Find the band that's in the middle of the coded spectrum */
+ for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+ count_dynalloc=0;
+ for(i=0;i<mask_end;i++)
+ {
+ opus_val32 lin;
+ opus_val16 unmask;
+ lin = mask_avg + diff*(i-midband);
+ if (C==2)
+ unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+ else
+ unmask = st->energy_mask[i];
+ unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+ unmask -= lin;
+ if (unmask > QCONST16(.25f, DB_SHIFT))
+ {
+ surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+ count_dynalloc++;
+ }
+ }
+ if (count_dynalloc>=3)
+ {
+ /* If we need dynalloc in many bands, it's probably because our
+ initial masking rate was too low. */
+ mask_avg += QCONST16(.25f, DB_SHIFT);
+ if (mask_avg>0)
+ {
+ /* Something went really wrong in the original calculations,
+ disabling masking. */
+ mask_avg = 0;
+ diff = 0;
+ for(i=0;i<mask_end;i++)
+ surround_dynalloc[i] = 0;
+ } else {
+ for(i=0;i<mask_end;i++)
+ surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+ }
+ }
+ mask_avg += QCONST16(.2f, DB_SHIFT);
+ /* Convert to 1/64th units used for the trim */
+ surround_trim = 64*diff;
+ /*printf("%d %d ", mask_avg, surround_trim);*/
+ surround_masking = mask_avg;
+ }
+ /* Temporal VBR (but not for LFE) */
+ if (!st->lfe)
+ {
+ opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
+ opus_val32 frame_avg=0;
+ opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+ for(i=st->start;i<st->end;i++)
+ {
+ follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
+ if (C==2)
+ follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
+ frame_avg += follow;
+ }
+ frame_avg /= (st->end-st->start);
+ temporal_vbr = SUB16(frame_avg,st->spec_avg);
+ temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr));
+ st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);
+ }
+ /*for (i=0;i<21;i++)
+ printf("%f ", bandLogE[i]);
+ printf("\n");*/
+
+ if (!secondMdct)
+ {
+ for (i=0;i<C*nbEBands;i++)
+ bandLogE2[i] = bandLogE[i];
+ }
+
+ /* Last chance to catch any transient we might have missed in the
+ time-domain analysis */
+ if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe)
+ {
+ if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C))
+ {
+ isTransient = 1;
+ shortBlocks = M;
+ compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+ compute_band_energies(mode, freq, bandE, effEnd, C, M);
+ amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+ /* Compensate for the scaling of short vs long mdcts */
+ for (i=0;i<C*nbEBands;i++)
+ bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+ tf_estimate = QCONST16(.2f,14);
+ }
+ }
+
+ if (LM>0 && ec_tell(enc)+3<=total_bits)
+ ec_enc_bit_logp(enc, isTransient, 3);
+
+ ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
+
+ /* Band normalisation */
+ normalise_bands(mode, freq, X, bandE, effEnd, C, M);
+
+ ALLOC(tf_res, nbEBands, int);
+ /* Disable variable tf resolution for hybrid and at very low bitrate */
+ if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe)
+ {
+ int lambda;
+ if (effectiveBytes<40)
+ lambda = 12;
+ else if (effectiveBytes<60)
+ lambda = 6;
+ else if (effectiveBytes<100)
+ lambda = 4;
+ else
+ lambda = 3;
+ lambda*=2;
+ tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
+ for (i=effEnd;i<st->end;i++)
+ tf_res[i] = tf_res[effEnd-1];
+ } else {
+ tf_sum = 0;
+ for (i=0;i<st->end;i++)
+ tf_res[i] = isTransient;
+ tf_select=0;
+ }
+
+ ALLOC(error, C*nbEBands, opus_val16);
+ quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE,
+ oldBandE, total_bits, error, enc,
+ C, LM, nbAvailableBytes, st->force_intra,
+ &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe);
+
+ tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
+
+ if (ec_tell(enc)+4<=total_bits)
+ {
+ if (st->lfe)
+ {
+ st->tapset_decision = 0;
+ st->spread_decision = SPREAD_NORMAL;
+ } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0)
+ {
+ if (st->complexity == 0)
+ st->spread_decision = SPREAD_NONE;
+ else
+ st->spread_decision = SPREAD_NORMAL;
+ } else {
+ /* Disable new spreading+tapset estimator until we can show it works
+ better than the old one. So far it seems like spreading_decision()
+ works best. */
+#if 0
+ if (st->analysis.valid)
+ {
+ static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
+ static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
+ static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
+ static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
+ st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
+ st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
+ } else
+#endif
+ {
+ st->spread_decision = spreading_decision(mode, X,
+ &st->tonal_average, st->spread_decision, &st->hf_average,
+ &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+ }
+ /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
+ /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
+ }
+ ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
+ }
+
+ ALLOC(offsets, nbEBands, int);
+
+ maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
+ st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
+ eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
+ /* For LFE, everything interesting is in the first band */
+ if (st->lfe)
+ offsets[0] = IMIN(8, effectiveBytes/3);
+ ALLOC(cap, nbEBands, int);
+ init_caps(mode,cap,LM,C);
+
+ dynalloc_logp = 6;
+ total_bits<<=BITRES;
+ total_boost = 0;
+ tell = ec_tell_frac(enc);
+ for (i=st->start;i<st->end;i++)
+ {
+ int width, quanta;
+ int dynalloc_loop_logp;
+ int boost;
+ int j;
+ width = C*(eBands[i+1]-eBands[i])<<LM;
+ /* quanta is 6 bits, but no more than 1 bit/sample
+ and no less than 1/8 bit/sample */
+ quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+ dynalloc_loop_logp = dynalloc_logp;
+ boost = 0;
+ for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
+ && boost < cap[i]; j++)
+ {
+ int flag;
+ flag = j<offsets[i];
+ ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
+ tell = ec_tell_frac(enc);
+ if (!flag)
+ break;
+ boost += quanta;
+ total_boost += quanta;
+ dynalloc_loop_logp = 1;
+ }
+ /* Making dynalloc more likely */
+ if (j)
+ dynalloc_logp = IMAX(2, dynalloc_logp-1);
+ offsets[i] = boost;
+ }
+
+ if (C==2)
+ {
+ static const opus_val16 intensity_thresholds[21]=
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/
+ { 1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134};
+ static const opus_val16 intensity_histeresis[21]=
+ { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8};
+
+ /* Always use MS for 2.5 ms frames until we can do a better analysis */
+ if (LM!=0)
+ dual_stereo = stereo_analysis(mode, X, LM, N);
+
+ st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
+ intensity_thresholds, intensity_histeresis, 21, st->intensity);
+ st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
+ }
+
+ alloc_trim = 5;
+ if (tell+(6<<BITRES) <= total_bits - total_boost)
+ {
+ if (st->lfe)
+ alloc_trim = 5;
+ else
+ alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
+ st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
+ ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
+ tell = ec_tell_frac(enc);
+ }
+
+ /* Variable bitrate */
+ if (vbr_rate>0)
+ {
+ opus_val16 alpha;
+ opus_int32 delta;
+ /* The target rate in 8th bits per frame */
+ opus_int32 target, base_target;
+ opus_int32 min_allowed;
+ int lm_diff = mode->maxLM - LM;
+
+ /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
+ The CELT allocator will just not be able to use more than that anyway. */
+ nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
+ base_target = vbr_rate - ((40*C+20)<<BITRES);
+
+ if (st->constrained_vbr)
+ base_target += (st->vbr_offset>>lm_diff);
+
+ target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
+ st->lastCodedBands, C, st->intensity, st->constrained_vbr,
+ st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
+ st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking,
+ temporal_vbr);
+
+ /* The current offset is removed from the target and the space used
+ so far is added*/
+ target=target+tell;
+ /* In VBR mode the frame size must not be reduced so much that it would
+ result in the encoder running out of bits.
+ The margin of 2 bytes ensures that none of the bust-prevention logic
+ in the decoder will have triggered so far. */
+ min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
+
+ nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
+ nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
+ nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
+
+ /* By how much did we "miss" the target on that frame */
+ delta = target - vbr_rate;
+
+ target=nbAvailableBytes<<(BITRES+3);
+
+ /*If the frame is silent we don't adjust our drift, otherwise
+ the encoder will shoot to very high rates after hitting a
+ span of silence, but we do allow the bitres to refill.
+ This means that we'll undershoot our target in CVBR/VBR modes
+ on files with lots of silence. */
+ if(silence)
+ {
+ nbAvailableBytes = 2;
+ target = 2*8<<BITRES;
+ delta = 0;
+ }
+
+ if (st->vbr_count < 970)
+ {
+ st->vbr_count++;
+ alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
+ } else
+ alpha = QCONST16(.001f,15);
+ /* How many bits have we used in excess of what we're allowed */
+ if (st->constrained_vbr)
+ st->vbr_reservoir += target - vbr_rate;
+ /*printf ("%d\n", st->vbr_reservoir);*/
+
+ /* Compute the offset we need to apply in order to reach the target */
+ if (st->constrained_vbr)
+ {
+ st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
+ st->vbr_offset = -st->vbr_drift;
+ }
+ /*printf ("%d\n", st->vbr_drift);*/
+
+ if (st->constrained_vbr && st->vbr_reservoir < 0)
+ {
+ /* We're under the min value -- increase rate */
+ int adjust = (-st->vbr_reservoir)/(8<<BITRES);
+ /* Unless we're just coding silence */
+ nbAvailableBytes += silence?0:adjust;
+ st->vbr_reservoir = 0;
+ /*printf ("+%d\n", adjust);*/
+ }
+ nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
+ /*printf("%d\n", nbCompressedBytes*50*8);*/
+ /* This moves the raw bits to take into account the new compressed size */
+ ec_enc_shrink(enc, nbCompressedBytes);
+ }
+
+ /* Bit allocation */
+ ALLOC(fine_quant, nbEBands, int);
+ ALLOC(pulses, nbEBands, int);
+ ALLOC(fine_priority, nbEBands, int);
+
+ /* bits = packet size - where we are - safety*/
+ bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
+ anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+ bits -= anti_collapse_rsv;
+ signalBandwidth = st->end-1;
+#ifndef DISABLE_FLOAT_API
+ if (st->analysis.valid)
+ {
+ int min_bandwidth;
+ if (equiv_rate < (opus_int32)32000*C)
+ min_bandwidth = 13;
+ else if (equiv_rate < (opus_int32)48000*C)
+ min_bandwidth = 16;
+ else if (equiv_rate < (opus_int32)60000*C)
+ min_bandwidth = 18;
+ else if (equiv_rate < (opus_int32)80000*C)
+ min_bandwidth = 19;
+ else
+ min_bandwidth = 20;
+ signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth);
+ }
+#endif
+ if (st->lfe)
+ signalBandwidth = 1;
+ codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+ alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
+ fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
+ if (st->lastCodedBands)
+ st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands));
+ else
+ st->lastCodedBands = codedBands;
+
+ quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
+
+ /* Residual quantisation */
+ ALLOC(collapse_masks, C*nbEBands, unsigned char);
+ quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+ bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
+ nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
+
+ if (anti_collapse_rsv > 0)
+ {
+ anti_collapse_on = st->consec_transient<2;
+#ifdef FUZZING
+ anti_collapse_on = rand()&0x1;
+#endif
+ ec_enc_bits(enc, anti_collapse_on, 1);
+ }
+ quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
+
+ if (silence)
+ {
+ for (i=0;i<C*nbEBands;i++)
+ oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+ }
+
+#ifdef RESYNTH
+ /* Re-synthesis of the coded audio if required */
+ {
+ celt_sig *out_mem[2];
+
+ if (anti_collapse_on)
+ {
+ anti_collapse(mode, X, collapse_masks, LM, C, N,
+ st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+ }
+
+ if (silence)
+ {
+ for (i=0;i<C*N;i++)
+ freq[i] = 0;
+ } else {
+ /* Synthesis */
+ denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+ }
+
+ c=0; do {
+ OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
+ } while (++c<CC);
+
+ if (CC==2&&C==1)
+ {
+ for (i=0;i<N;i++)
+ freq[N+i] = freq[i];
+ }
+
+ c=0; do {
+ out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
+ } while (++c<CC);
+
+ compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM);
+
+ c=0; do {
+ st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+ st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
+ comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
+ st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
+ mode->window, st->overlap);
+ if (LM!=0)
+ comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
+ st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
+ mode->window, overlap);
+ } while (++c<CC);
+
+ /* We reuse freq[] as scratch space for the de-emphasis */
+ deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq);
+ st->prefilter_period_old = st->prefilter_period;
+ st->prefilter_gain_old = st->prefilter_gain;
+ st->prefilter_tapset_old = st->prefilter_tapset;
+ }
+#endif
+
+ st->prefilter_period = pitch_index;
+ st->prefilter_gain = gain1;
+ st->prefilter_tapset = prefilter_tapset;
+#ifdef RESYNTH
+ if (LM!=0)
+ {
+ st->prefilter_period_old = st->prefilter_period;
+ st->prefilter_gain_old = st->prefilter_gain;
+ st->prefilter_tapset_old = st->prefilter_tapset;
+ }
+#endif
+
+ if (CC==2&&C==1) {
+ for (i=0;i<nbEBands;i++)
+ oldBandE[nbEBands+i]=oldBandE[i];
+ }
+
+ if (!isTransient)
+ {
+ for (i=0;i<CC*nbEBands;i++)
+ oldLogE2[i] = oldLogE[i];
+ for (i=0;i<CC*nbEBands;i++)
+ oldLogE[i] = oldBandE[i];
+ } else {
+ for (i=0;i<CC*nbEBands;i++)
+ oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+ }
+ /* In case start or end were to change */
+ c=0; do
+ {
+ for (i=0;i<st->start;i++)
+ {
+ oldBandE[c*nbEBands+i]=0;
+ oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+ }
+ for (i=st->end;i<nbEBands;i++)
+ {
+ oldBandE[c*nbEBands+i]=0;
+ oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+ }
+ } while (++c<CC);
+
+ if (isTransient || transient_got_disabled)
+ st->consec_transient++;
+ else
+ st->consec_transient=0;
+ st->rng = enc->rng;
+
+ /* If there's any room left (can only happen for very high rates),
+ it's already filled with zeros */
+ ec_enc_done(enc);
+
+#ifdef CUSTOM_MODES
+ if (st->signalling)
+ nbCompressedBytes++;
+#endif
+
+ RESTORE_STACK;
+ if (ec_get_error(enc))
+ return OPUS_INTERNAL_ERROR;
+ else
+ return nbCompressedBytes;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef OPUS_FIXED_POINT
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+ return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+ int j, ret, C, N;
+ VARDECL(opus_int16, in);
+ ALLOC_STACK;
+
+ if (pcm==NULL)
+ return OPUS_BAD_ARG;
+
+ C = st->channels;
+ N = frame_size;
+ ALLOC(in, C*N, opus_int16);
+
+ for (j=0;j<C*N;j++)
+ in[j] = FLOAT2INT16(pcm[j]);
+
+ ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+ for (j=0;j<C*N;j++)
+ ((float*)pcm)[j]=in[j]*(1.f/32768.f);
+#endif
+ RESTORE_STACK;
+ return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+#else
+
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+ int j, ret, C, N;
+ VARDECL(celt_sig, in);
+ ALLOC_STACK;
+
+ if (pcm==NULL)
+ return OPUS_BAD_ARG;
+
+ C=st->channels;
+ N=frame_size;
+ ALLOC(in, C*N, celt_sig);
+ for (j=0;j<C*N;j++) {
+ in[j] = SCALEOUT(pcm[j]);
+ }
+
+ ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+ for (j=0;j<C*N;j++)
+ ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
+#endif
+ RESTORE_STACK;
+ return ret;
+}
+
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+ return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#endif
+
+#endif /* CUSTOM_MODES */
+
+int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
+{
+ va_list ap;
+
+ va_start(ap, request);
+ switch (request)
+ {
+ case OPUS_SET_COMPLEXITY_REQUEST:
+ {
+ int value = va_arg(ap, opus_int32);
+ if (value<0 || value>10)
+ goto bad_arg;
+ st->complexity = value;
+ }
+ break;
+ case CELT_SET_START_BAND_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<0 || value>=st->mode->nbEBands)
+ goto bad_arg;
+ st->start = value;
+ }
+ break;
+ case CELT_SET_END_BAND_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<1 || value>st->mode->nbEBands)
+ goto bad_arg;
+ st->end = value;
+ }
+ break;
+ case CELT_SET_PREDICTION_REQUEST:
+ {
+ int value = va_arg(ap, opus_int32);
+ if (value<0 || value>2)
+ goto bad_arg;
+ st->disable_pf = value<=1;
+ st->force_intra = value==0;
+ }
+ break;
+ case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+ {
+ int value = va_arg(ap, opus_int32);
+ if (value<0 || value>100)
+ goto bad_arg;
+ st->loss_rate = value;
+ }
+ break;
+ case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->constrained_vbr = value;
+ }
+ break;
+ case OPUS_SET_VBR_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->vbr = value;
+ }
+ break;
+ case OPUS_SET_BITRATE_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<=500 && value!=OPUS_BITRATE_MAX)
+ goto bad_arg;
+ value = IMIN(value, 260000*st->channels);
+ st->bitrate = value;
+ }
+ break;
+ case CELT_SET_CHANNELS_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<1 || value>2)
+ goto bad_arg;
+ st->stream_channels = value;
+ }
+ break;
+ case OPUS_SET_LSB_DEPTH_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<8 || value>24)
+ goto bad_arg;
+ st->lsb_depth=value;
+ }
+ break;
+ case OPUS_GET_LSB_DEPTH_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ *value=st->lsb_depth;
+ }
+ break;
+ case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->variable_duration = value;
+ }
+ break;
+ case OPUS_RESET_STATE:
+ {
+ int i;
+ opus_val16 *oldBandE, *oldLogE, *oldLogE2;
+ oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD));
+ oldLogE = oldBandE + st->channels*st->mode->nbEBands;
+ oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
+ OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
+ opus_custom_encoder_get_size(st->mode, st->channels)-
+ ((char*)&st->ENCODER_RESET_START - (char*)st));
+ for (i=0;i<st->channels*st->mode->nbEBands;i++)
+ oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+ st->vbr_offset = 0;
+ st->delayedIntra = 1;
+ st->spread_decision = SPREAD_NORMAL;
+ st->tonal_average = 256;
+ st->hf_average = 0;
+ st->tapset_decision = 0;
+ }
+ break;
+#ifdef CUSTOM_MODES
+ case CELT_SET_INPUT_CLIPPING_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->clip = value;
+ }
+ break;
+#endif
+ case CELT_SET_SIGNALLING_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->signalling = value;
+ }
+ break;
+ case CELT_SET_ANALYSIS_REQUEST:
+ {
+ AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
+ if (info)
+ OPUS_COPY(&st->analysis, info, 1);
+ }
+ break;
+ case CELT_GET_MODE_REQUEST:
+ {
+ const CELTMode ** value = va_arg(ap, const CELTMode**);
+ if (value==0)
+ goto bad_arg;
+ *value=st->mode;
+ }
+ break;
+ case OPUS_GET_FINAL_RANGE_REQUEST:
+ {
+ opus_uint32 * value = va_arg(ap, opus_uint32 *);
+ if (value==0)
+ goto bad_arg;
+ *value=st->rng;
+ }
+ break;
+ case OPUS_SET_LFE_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->lfe = value;
+ }
+ break;
+ case OPUS_SET_ENERGY_MASK_REQUEST:
+ {
+ opus_val16 *value = va_arg(ap, opus_val16*);
+ st->energy_mask = value;
+ }
+ break;
+ default:
+ goto bad_request;
+ }
+ va_end(ap);
+ return OPUS_OK;
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
+bad_request:
+ va_end(ap);
+ return OPUS_UNIMPLEMENTED;
+}
diff --git a/drivers/opus/celt/celt_lpc.c b/drivers/opus/celt/celt_lpc.c
new file mode 100644
index 0000000000..1fa4406bc9
--- /dev/null
+++ b/drivers/opus/celt/celt_lpc.c
@@ -0,0 +1,309 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+void _celt_lpc(
+ opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */
+const opus_val32 *ac, /* in: [0...p] autocorrelation values */
+int p
+)
+{
+ int i, j;
+ opus_val32 r;
+ opus_val32 error = ac[0];
+#ifdef OPUS_FIXED_POINT
+ opus_val32 lpc[LPC_ORDER];
+#else
+ float *lpc = _lpc;
+#endif
+
+ for (i = 0; i < p; i++)
+ lpc[i] = 0;
+ if (ac[0] != 0)
+ {
+ for (i = 0; i < p; i++) {
+ /* Sum up this iteration's reflection coefficient */
+ opus_val32 rr = 0;
+ for (j = 0; j < i; j++)
+ rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+ rr += SHR32(ac[i + 1],3);
+ r = -frac_div32(SHL32(rr,3), error);
+ /* Update LPC coefficients and total error */
+ lpc[i] = SHR32(r,3);
+ for (j = 0; j < (i+1)>>1; j++)
+ {
+ opus_val32 tmp1, tmp2;
+ tmp1 = lpc[j];
+ tmp2 = lpc[i-1-j];
+ lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
+ lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
+ }
+
+ error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
+ /* Bail out once we get 30 dB gain */
+#ifdef OPUS_FIXED_POINT
+ if (error<SHR32(ac[0],10))
+ break;
+#else
+ if (error<.001f*ac[0])
+ break;
+#endif
+ }
+ }
+#ifdef OPUS_FIXED_POINT
+ for (i=0;i<p;i++)
+ _lpc[i] = ROUND16(lpc[i],16);
+#endif
+}
+
+void celt_fir(const opus_val16 *_x,
+ const opus_val16 *num,
+ opus_val16 *_y,
+ int N,
+ int ord,
+ opus_val16 *mem)
+{
+ int i,j;
+ VARDECL(opus_val16, rnum);
+ VARDECL(opus_val16, x);
+ SAVE_STACK;
+
+ ALLOC(rnum, ord, opus_val16);
+ ALLOC(x, N+ord, opus_val16);
+ for(i=0;i<ord;i++)
+ rnum[i] = num[ord-i-1];
+ for(i=0;i<ord;i++)
+ x[i] = mem[ord-i-1];
+ for (i=0;i<N;i++)
+ x[i+ord]=_x[i];
+ for(i=0;i<ord;i++)
+ mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
+ for (i=0;i<N;i++)
+ {
+ opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
+ for (j=0;j<ord;j++)
+ {
+ sum = MAC16_16(sum,rnum[j],x[i+j]);
+ }
+ _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
+ }
+#else
+ for (i=0;i<N-3;i+=4)
+ {
+ opus_val32 sum[4]={0,0,0,0};
+ xcorr_kernel(rnum, x+i, sum, ord);
+ _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
+ _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
+ _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
+ _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
+ }
+ for (;i<N;i++)
+ {
+ opus_val32 sum = 0;
+ for (j=0;j<ord;j++)
+ sum = MAC16_16(sum,rnum[j],x[i+j]);
+ _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+ }
+#endif
+ RESTORE_STACK;
+}
+
+void celt_iir(const opus_val32 *_x,
+ const opus_val16 *den,
+ opus_val32 *_y,
+ int N,
+ int ord,
+ opus_val16 *mem)
+{
+#ifdef SMALL_FOOTPRINT
+ int i,j;
+ for (i=0;i<N;i++)
+ {
+ opus_val32 sum = _x[i];
+ for (j=0;j<ord;j++)
+ {
+ sum -= MULT16_16(den[j],mem[j]);
+ }
+ for (j=ord-1;j>=1;j--)
+ {
+ mem[j]=mem[j-1];
+ }
+ mem[0] = ROUND16(sum,SIG_SHIFT);
+ _y[i] = sum;
+ }
+#else
+ int i,j;
+ VARDECL(opus_val16, rden);
+ VARDECL(opus_val16, y);
+ SAVE_STACK;
+
+ celt_assert((ord&3)==0);
+ ALLOC(rden, ord, opus_val16);
+ ALLOC(y, N+ord, opus_val16);
+ for(i=0;i<ord;i++)
+ rden[i] = den[ord-i-1];
+ for(i=0;i<ord;i++)
+ y[i] = -mem[ord-i-1];
+ for(;i<N+ord;i++)
+ y[i]=0;
+ for (i=0;i<N-3;i+=4)
+ {
+ /* Unroll by 4 as if it were an FIR filter */
+ opus_val32 sum[4];
+ sum[0]=_x[i];
+ sum[1]=_x[i+1];
+ sum[2]=_x[i+2];
+ sum[3]=_x[i+3];
+ xcorr_kernel(rden, y+i, sum, ord);
+
+ /* Patch up the result to compensate for the fact that this is an IIR */
+ y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
+ _y[i ] = sum[0];
+ sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
+ y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
+ _y[i+1] = sum[1];
+ sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+ sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
+ y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
+ _y[i+2] = sum[2];
+
+ sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+ sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+ sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
+ y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
+ _y[i+3] = sum[3];
+ }
+ for (;i<N;i++)
+ {
+ opus_val32 sum = _x[i];
+ for (j=0;j<ord;j++)
+ sum -= MULT16_16(rden[j],y[i+j]);
+ y[i+ord] = ROUND16(sum,SIG_SHIFT);
+ _y[i] = sum;
+ }
+ for(i=0;i<ord;i++)
+ mem[i] = _y[N-i-1];
+ RESTORE_STACK;
+#endif
+}
+
+int _celt_autocorr(
+ const opus_val16 *x, /* in: [0...n-1] samples x */
+ opus_val32 *ac, /* out: [0...lag-1] ac values */
+ const opus_val16 *window,
+ int overlap,
+ int lag,
+ int n,
+ int arch
+ )
+{
+ opus_val32 d;
+ int i, k;
+ int fastN=n-lag;
+ int shift;
+ const opus_val16 *xptr;
+ VARDECL(opus_val16, xx);
+ SAVE_STACK;
+ ALLOC(xx, n, opus_val16);
+ celt_assert(n>0);
+ celt_assert(overlap>=0);
+ if (overlap == 0)
+ {
+ xptr = x;
+ } else {
+ for (i=0;i<n;i++)
+ xx[i] = x[i];
+ for (i=0;i<overlap;i++)
+ {
+ xx[i] = MULT16_16_Q15(x[i],window[i]);
+ xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+ }
+ xptr = xx;
+ }
+ shift=0;
+#ifdef OPUS_FIXED_POINT
+ {
+ opus_val32 ac0;
+ ac0 = 1+(n<<7);
+ if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
+ for(i=(n&1);i<n;i+=2)
+ {
+ ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+ ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+ }
+
+ shift = celt_ilog2(ac0)-30+10;
+ shift = (shift)/2;
+ if (shift>0)
+ {
+ for(i=0;i<n;i++)
+ xx[i] = PSHR32(xptr[i], shift);
+ xptr = xx;
+ } else
+ shift = 0;
+ }
+#endif
+ celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
+ for (k=0;k<=lag;k++)
+ {
+ for (i = k+fastN, d = 0; i < n; i++)
+ d = MAC16_16(d, xptr[i], xptr[i-k]);
+ ac[k] += d;
+ }
+#ifdef OPUS_FIXED_POINT
+ shift = 2*shift;
+ if (shift<=0)
+ ac[0] += SHL32((opus_int32)1, -shift);
+ if (ac[0] < 268435456)
+ {
+ int shift2 = 29 - EC_ILOG(ac[0]);
+ for (i=0;i<=lag;i++)
+ ac[i] = SHL32(ac[i], shift2);
+ shift -= shift2;
+ } else if (ac[0] >= 536870912)
+ {
+ int shift2=1;
+ if (ac[0] >= 1073741824)
+ shift2++;
+ for (i=0;i<=lag;i++)
+ ac[i] = SHR32(ac[i], shift2);
+ shift += shift2;
+ }
+#endif
+
+ RESTORE_STACK;
+ return shift;
+}
diff --git a/drivers/opus/celt/celt_lpc.h b/drivers/opus/celt/celt_lpc.h
new file mode 100644
index 0000000000..dc2a0a3d26
--- /dev/null
+++ b/drivers/opus/celt/celt_lpc.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PLC_H
+#define PLC_H
+
+#include "arch.h"
+
+#define LPC_ORDER 24
+
+void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
+
+void celt_fir(const opus_val16 *x,
+ const opus_val16 *num,
+ opus_val16 *y,
+ int N,
+ int ord,
+ opus_val16 *mem);
+
+void celt_iir(const opus_val32 *x,
+ const opus_val16 *den,
+ opus_val32 *y,
+ int N,
+ int ord,
+ opus_val16 *mem);
+
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
+ const opus_val16 *window, int overlap, int lag, int n, int arch);
+
+#endif /* PLC_H */
diff --git a/drivers/opus/celt/cpu_support.h b/drivers/opus/celt/cpu_support.h
new file mode 100644
index 0000000000..d68dbe62c5
--- /dev/null
+++ b/drivers/opus/celt/cpu_support.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
+#include "arm/armcpu.h"
+
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+
+#else
+#define OPUS_ARCHMASK 0
+
+static OPUS_INLINE int opus_select_arch(void)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/opus/celt/cwrs.c b/drivers/opus/celt/cwrs.c
new file mode 100644
index 0000000000..b866aa9210
--- /dev/null
+++ b/drivers/opus/celt/cwrs.c
@@ -0,0 +1,697 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2007-2009 Timothy B. Terriberry
+ Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "os_support.h"
+#include "cwrs.h"
+#include "mathops.h"
+#include "arch.h"
+
+#ifdef CUSTOM_MODES
+
+/*Guaranteed to return a conservatively large estimate of the binary logarithm
+ with frac bits of fractional precision.
+ Tested for all possible 32-bit inputs with frac=4, where the maximum
+ overestimation is 0.06254243 bits.*/
+int log2_frac(opus_uint32 val, int frac)
+{
+ int l;
+ l=EC_ILOG(val);
+ if(val&(val-1)){
+ /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
+ before the shift would cause overflow (e.g., for 0xFFFFxxxx).
+ Doesn't work for val=0, but that case fails the test above.*/
+ if(l>16)val=((val-1)>>(l-16))+1;
+ else val<<=16-l;
+ l=(l-1)<<frac;
+ /*Note that we always need one iteration, since the rounding up above means
+ that we might need to adjust the integer part of the logarithm.*/
+ do{
+ int b;
+ b=(int)(val>>16);
+ l+=b<<frac;
+ val=(val+b)>>b;
+ val=(val*val+0x7FFF)>>15;
+ }
+ while(frac-->0);
+ /*If val is not exactly 0x8000, then we have to round up the remainder.*/
+ return l+(val>0x8000);
+ }
+ /*Exact powers of two require no rounding.*/
+ else return (l-1)<<frac;
+}
+#endif
+
+/*Although derived separately, the pulse vector coding scheme is equivalent to
+ a Pyramid Vector Quantizer \cite{Fis86}.
+ Some additional notes about an early version appear at
+ http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
+ and the definitions of some terms have evolved since that was written.
+
+ The conversion from a pulse vector to an integer index (encoding) and back
+ (decoding) is governed by two related functions, V(N,K) and U(N,K).
+
+ V(N,K) = the number of combinations, with replacement, of N items, taken K
+ at a time, when a sign bit is added to each item taken at least once (i.e.,
+ the number of N-dimensional unit pulse vectors with K pulses).
+ One way to compute this is via
+ V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
+ where choose() is the binomial function.
+ A table of values for N<10 and K<10 looks like:
+ V[10][10] = {
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ {1, 4, 8, 12, 16, 20, 24, 28, 32, 36},
+ {1, 6, 18, 38, 66, 102, 146, 198, 258, 326},
+ {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992},
+ {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290},
+ {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436},
+ {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598},
+ {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688},
+ {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146}
+ };
+
+ U(N,K) = the number of such combinations wherein N-1 objects are taken at
+ most K-1 at a time.
+ This is given by
+ U(N,K) = sum(k=0...K-1,V(N-1,k))
+ = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
+ The latter expression also makes clear that U(N,K) is half the number of such
+ combinations wherein the first object is taken at least once.
+ Although it may not be clear from either of these definitions, U(N,K) is the
+ natural function to work with when enumerating the pulse vector codebooks,
+ not V(N,K).
+ U(N,K) is not well-defined for N=0, but with the extension
+ U(0,K) = K>0 ? 0 : 1,
+ the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
+ U[10][10] = {
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {0, 1, 3, 5, 7, 9, 11, 13, 15, 17},
+ {0, 1, 5, 13, 25, 41, 61, 85, 113, 145},
+ {0, 1, 7, 25, 63, 129, 231, 377, 575, 833},
+ {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649},
+ {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073},
+ {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081},
+ {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545},
+ {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729}
+ };
+
+ With this extension, V(N,K) may be written in terms of U(N,K):
+ V(N,K) = U(N,K) + U(N,K+1)
+ for all N>=0, K>=0.
+ Thus U(N,K+1) represents the number of combinations where the first element
+ is positive or zero, and U(N,K) represents the number of combinations where
+ it is negative.
+ With a large enough table of U(N,K) values, we could write O(N) encoding
+ and O(min(N*log(K),N+K)) decoding routines, but such a table would be
+ prohibitively large for small embedded devices (K may be as large as 32767
+ for small N, and N may be as large as 200).
+
+ Both functions obey the same recurrence relation:
+ V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
+ U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
+ for all N>0, K>0, with different initial conditions at N=0 or K=0.
+ This allows us to construct a row of one of the tables above given the
+ previous row or the next row.
+ Thus we can derive O(NK) encoding and decoding routines with O(K) memory
+ using only addition and subtraction.
+
+ When encoding, we build up from the U(2,K) row and work our way forwards.
+ When decoding, we need to start at the U(N,K) row and work our way backwards,
+ which requires a means of computing U(N,K).
+ U(N,K) may be computed from two previous values with the same N:
+ U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
+ for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
+ previous values with the same K:
+ U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
+ for all K>1.
+ This allows us to construct an arbitrary row of the U(N,K) table by starting
+ with the first two values, which are constants.
+ This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
+ multiplications.
+ Similar relations can be derived for V(N,K), but are not used here.
+
+ For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
+ polynomial for fixed N.
+ The first few are
+ U(1,K) = 1,
+ U(2,K) = 2*K-1,
+ U(3,K) = (2*K-2)*K+1,
+ U(4,K) = (((4*K-6)*K+8)*K-3)/3,
+ U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
+ and
+ V(1,K) = 2,
+ V(2,K) = 4*K,
+ V(3,K) = 4*K*K+2,
+ V(4,K) = 8*(K*K+2)*K/3,
+ V(5,K) = ((4*K*K+20)*K*K+6)/3,
+ for all K>0.
+ This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
+ small N (and indeed decoding is also O(N) for N<3).
+
+ @ARTICLE{Fis86,
+ author="Thomas R. Fischer",
+ title="A Pyramid Vector Quantizer",
+ journal="IEEE Transactions on Information Theory",
+ volume="IT-32",
+ number=4,
+ pages="568--583",
+ month=Jul,
+ year=1986
+ }*/
+
+#if !defined(SMALL_FOOTPRINT)
+
+/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/
+# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)])
+/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N
+ with K pulses allocated to it.*/
+# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1))
+
+/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)).
+ Thus, the number of entries in row I is the larger of the maximum number of
+ pulses we will ever allocate for a given N=I (K=128, or however many fit in
+ 32 bits, whichever is smaller), plus one, and the maximum N for which
+ K=I-1 pulses fit in 32 bits.
+ The largest band size in an Opus Custom mode is 208.
+ Otherwise, we can limit things to the set of N which can be achieved by
+ splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48,
+ 44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/
+#if defined(CUSTOM_MODES)
+static const opus_uint32 CELT_PVQ_U_DATA[1488]={
+#else
+static const opus_uint32 CELT_PVQ_U_DATA[1272]={
+#endif
+ /*N=0, K=0...176:*/
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+#endif
+ /*N=1, K=1...176:*/
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1,
+#endif
+ /*N=2, K=2...176:*/
+ 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+ 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+ 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
+ 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
+ 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
+ 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
+ 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
+ 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
+ 265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
+ 295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
+ 325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381,
+ 383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411,
+ 413, 415,
+#endif
+ /*N=3, K=3...176:*/
+ 13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
+ 685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
+ 1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
+ 3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
+ 6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
+ 9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
+ 13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
+ 17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
+ 21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
+ 26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
+ 31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
+ 37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
+ 43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
+ 50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
+ 57461, 58141, 58825, 59513, 60205, 60901, 61601,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565,
+ 70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013,
+ 78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113,
+#endif
+ /*N=4, K=4...176:*/
+ 63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
+ 7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
+ 30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
+ 82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
+ 161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
+ 267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
+ 410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
+ 597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
+ 833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
+ 1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
+ 1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
+ 1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
+ 2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
+ 2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
+ 3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
+ 3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
+ 4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
+ 5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
+ 6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
+ 7085049, 7207551,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783,
+ 8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967,
+ 9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199,
+ 10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177,
+ 11912575,
+#endif
+ /*N=5, K=5...176:*/
+ 321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
+ 50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
+ 330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
+ 1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
+ 2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
+ 4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
+ 8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
+ 13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
+ 20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
+ 29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
+ 40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
+ 55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
+ 73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
+ 95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
+ 122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
+ 155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
+ 193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
+ 238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
+ 290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
+ 351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
+ 420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
+ 500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
+ 590359041, 604167209, 618216201, 632508801,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241,
+ 755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161,
+ 878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329,
+ 1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041,
+ 1143412929, 1166053121, 1189027881, 1212340489, 1235994241,
+#endif
+ /*N=6, K=6...96:*/
+ 1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
+ 335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
+ 2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
+ 11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
+ 29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
+ 64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
+ 128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
+ 235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
+ 402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
+ 655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
+ 1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
+ 1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
+ 2011371957, 2120032959,
+#if defined(CUSTOM_MODES)
+ /*...109:*/
+ 2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U,
+ 3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U,
+ 4012305913U,
+#endif
+ /*N=7, K=7...54*/
+ 8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
+ 1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
+ 19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
+ 88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
+ 292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
+ 793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
+ 1667010073, 1870535785, 2094367717,
+#if defined(CUSTOM_MODES)
+ /*...60:*/
+ 2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U,
+#endif
+ /*N=8, K=8...37*/
+ 48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
+ 9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
+ 104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
+ 638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
+ 2229491905U,
+#if defined(CUSTOM_MODES)
+ /*...40:*/
+ 2691463695U, 3233240945U, 3866006015U,
+#endif
+ /*N=9, K=9...28:*/
+ 265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
+ 39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
+ 628496897, 872893441, 1196924561, 1621925137, 2173806145U,
+#if defined(CUSTOM_MODES)
+ /*...29:*/
+ 2883810113U,
+#endif
+ /*N=10, K=10...24:*/
+ 1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
+ 254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U,
+ 3375210671U,
+ /*N=11, K=11...19:*/
+ 8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
+ 948062325, 1616336765,
+#if defined(CUSTOM_MODES)
+ /*...20:*/
+ 2684641785U,
+#endif
+ /*N=12, K=12...18:*/
+ 45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
+ 3248227095U,
+ /*N=13, K=13...16:*/
+ 251595969, 579168825, 1267854873, 2653649025U,
+ /*N=14, K=14:*/
+ 1409933619
+};
+
+#if defined(CUSTOM_MODES)
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+ CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
+ CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
+ CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
+ CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
+ CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
+};
+#else
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+ CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
+ CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
+ CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
+ CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
+ CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
+};
+#endif
+
+#if defined(CUSTOM_MODES)
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+ int k;
+ /*_maxk==0 => there's nothing to do.*/
+ celt_assert(_maxk>0);
+ _bits[0]=0;
+ for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac);
+}
+#endif
+
+static opus_uint32 icwrs(int _n,const int *_y){
+ opus_uint32 i;
+ int j;
+ int k;
+ celt_assert(_n>=2);
+ j=_n-1;
+ i=_y[j]<0;
+ k=abs(_y[j]);
+ do{
+ j--;
+ i+=CELT_PVQ_U(_n-j,k);
+ k+=abs(_y[j]);
+ if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1);
+ }
+ while(j>0);
+ return i;
+}
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+ celt_assert(_k>0);
+ ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
+}
+
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+ opus_uint32 p;
+ int s;
+ int k0;
+ celt_assert(_k>0);
+ celt_assert(_n>1);
+ while(_n>2){
+ opus_uint32 q;
+ /*Lots of pulses case:*/
+ if(_k>=_n){
+ const opus_uint32 *row;
+ row=CELT_PVQ_U_ROW[_n];
+ /*Are the pulses in this dimension negative?*/
+ p=row[_k+1];
+ s=-(_i>=p);
+ _i-=p&s;
+ /*Count how many pulses were placed in this dimension.*/
+ k0=_k;
+ q=row[_n];
+ if(q>_i){
+ celt_assert(p>q);
+ _k=_n;
+ do p=CELT_PVQ_U_ROW[--_k][_n];
+ while(p>_i);
+ }
+ else for(p=row[_k];p>_i;p=row[_k])_k--;
+ _i-=p;
+ *_y++=(k0-_k+s)^s;
+ }
+ /*Lots of dimensions case:*/
+ else{
+ /*Are there any pulses in this dimension at all?*/
+ p=CELT_PVQ_U_ROW[_k][_n];
+ q=CELT_PVQ_U_ROW[_k+1][_n];
+ if(p<=_i&&_i<q){
+ _i-=p;
+ *_y++=0;
+ }
+ else{
+ /*Are the pulses in this dimension negative?*/
+ s=-(_i>=q);
+ _i-=q&s;
+ /*Count how many pulses were placed in this dimension.*/
+ k0=_k;
+ do p=CELT_PVQ_U_ROW[--_k][_n];
+ while(p>_i);
+ _i-=p;
+ *_y++=(k0-_k+s)^s;
+ }
+ }
+ _n--;
+ }
+ /*_n==2*/
+ p=2*_k+1;
+ s=-(_i>=p);
+ _i-=p&s;
+ k0=_k;
+ _k=(_i+1)>>1;
+ if(_k)_i-=2*_k-1;
+ *_y++=(k0-_k+s)^s;
+ /*_n==1*/
+ s=-(int)_i;
+ *_y=(_k+s)^s;
+}
+
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+ cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
+}
+
+#else /* SMALL_FOOTPRINT */
+
+/*Computes the next row/column of any recurrence that obeys the relation
+ u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
+ _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
+ opus_uint32 ui1;
+ unsigned j;
+ /*This do-while will overrun the array if we don't have storage for at least
+ 2 values.*/
+ j=1; do {
+ ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
+ _ui[j-1]=_ui0;
+ _ui0=ui1;
+ } while (++j<_len);
+ _ui[j-1]=_ui0;
+}
+
+/*Computes the previous row/column of any recurrence that obeys the relation
+ u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
+ _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
+ opus_uint32 ui1;
+ unsigned j;
+ /*This do-while will overrun the array if we don't have storage for at least
+ 2 values.*/
+ j=1; do {
+ ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
+ _ui[j-1]=_ui0;
+ _ui0=ui1;
+ } while (++j<_n);
+ _ui[j-1]=_ui0;
+}
+
+/*Compute V(_n,_k), as well as U(_n,0..._k+1).
+ _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
+static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
+ opus_uint32 um2;
+ unsigned len;
+ unsigned k;
+ len=_k+2;
+ /*We require storage at least 3 values (e.g., _k>0).*/
+ celt_assert(len>=3);
+ _u[0]=0;
+ _u[1]=um2=1;
+ /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+ /*If _n==1, _u[i] should be 1 for i>1.*/
+ celt_assert(_n>=2);
+ /*If _k==0, the following do-while loop will overflow the buffer.*/
+ celt_assert(_k>0);
+ k=2;
+ do _u[k]=(k<<1)-1;
+ while(++k<len);
+ for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
+ return _u[_k]+_u[_k+1];
+}
+
+/*Returns the _i'th combination of _k elements chosen from a set of size _n
+ with associated sign bits.
+ _y: Returns the vector of pulses.
+ _u: Must contain entries [0..._k+1] of row _n of U() on input.
+ Its contents will be destructively modified.*/
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
+ int j;
+ celt_assert(_n>0);
+ j=0;
+ do{
+ opus_uint32 p;
+ int s;
+ int yj;
+ p=_u[_k+1];
+ s=-(_i>=p);
+ _i-=p&s;
+ yj=_k;
+ p=_u[_k];
+ while(p>_i)p=_u[--_k];
+ _i-=p;
+ yj-=_k;
+ _y[j]=(yj+s)^s;
+ uprev(_u,_k+2,0);
+ }
+ while(++j<_n);
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+ of size 1 with associated sign bits.
+ _y: The vector of pulses, whose sum of absolute values is K.
+ _k: Returns K.*/
+static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){
+ *_k=abs(_y[0]);
+ return _y[0]<0;
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+ of size _n with associated sign bits.
+ _y: The vector of pulses, whose sum of absolute values must be _k.
+ _nc: Returns V(_n,_k).*/
+static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
+ opus_uint32 *_u){
+ opus_uint32 i;
+ int j;
+ int k;
+ /*We can't unroll the first two iterations of the loop unless _n>=2.*/
+ celt_assert(_n>=2);
+ _u[0]=0;
+ for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
+ i=icwrs1(_y+_n-1,&k);
+ j=_n-2;
+ i+=_u[k];
+ k+=abs(_y[j]);
+ if(_y[j]<0)i+=_u[k+1];
+ while(j-->0){
+ unext(_u,_k+2,0);
+ i+=_u[k];
+ k+=abs(_y[j]);
+ if(_y[j]<0)i+=_u[k+1];
+ }
+ *_nc=_u[k]+_u[k+1];
+ return i;
+}
+
+#ifdef CUSTOM_MODES
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+ int k;
+ /*_maxk==0 => there's nothing to do.*/
+ celt_assert(_maxk>0);
+ _bits[0]=0;
+ if (_n==1)
+ {
+ for (k=1;k<=_maxk;k++)
+ _bits[k] = 1<<_frac;
+ }
+ else {
+ VARDECL(opus_uint32,u);
+ SAVE_STACK;
+ ALLOC(u,_maxk+2U,opus_uint32);
+ ncwrs_urow(_n,_maxk,u);
+ for(k=1;k<=_maxk;k++)
+ _bits[k]=log2_frac(u[k]+u[k+1],_frac);
+ RESTORE_STACK;
+ }
+}
+#endif /* CUSTOM_MODES */
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+ opus_uint32 i;
+ VARDECL(opus_uint32,u);
+ opus_uint32 nc;
+ SAVE_STACK;
+ celt_assert(_k>0);
+ ALLOC(u,_k+2U,opus_uint32);
+ i=icwrs(_n,_k,&nc,_y,u);
+ ec_enc_uint(_enc,i,nc);
+ RESTORE_STACK;
+}
+
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+ VARDECL(opus_uint32,u);
+ SAVE_STACK;
+ celt_assert(_k>0);
+ ALLOC(u,_k+2U,opus_uint32);
+ cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+ RESTORE_STACK;
+}
+
+#endif /* SMALL_FOOTPRINT */
diff --git a/drivers/opus/celt/cwrs.h b/drivers/opus/celt/cwrs.h
new file mode 100644
index 0000000000..7dfbd076d1
--- /dev/null
+++ b/drivers/opus/celt/cwrs.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2007-2009 Timothy B. Terriberry
+ Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CWRS_H
+#define CWRS_H
+
+#include "arch.h"
+#include "stack_alloc.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef CUSTOM_MODES
+int log2_frac(opus_uint32 val, int frac);
+#endif
+
+void get_required_bits(opus_int16 *bits, int N, int K, int frac);
+
+void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
+
+void decode_pulses(int *_y, int N, int K, ec_dec *dec);
+
+#endif /* CWRS_H */
diff --git a/drivers/opus/celt/ecintrin.h b/drivers/opus/celt/ecintrin.h
new file mode 100644
index 0000000000..2263cff6bd
--- /dev/null
+++ b/drivers/opus/celt/ecintrin.h
@@ -0,0 +1,87 @@
+/* Copyright (c) 2003-2008 Timothy B. Terriberry
+ Copyright (c) 2008 Xiph.Org Foundation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include "opus_types.h"
+#include <math.h>
+#include <limits.h>
+#include "arch.h"
+#if !defined(_ecintrin_H)
+# define _ecintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly
+ versions of these functions which can substantially improve performance.
+ We define macros for them to allow easy incorporation of these non-ANSI
+ features.*/
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+ given an appropriate architecture, but the branchless bit-twiddling versions
+ are just as fast, and do not require any special target architecture.
+ Earlier gcc versions (3.x) compiled both code to the same assembly
+ instructions, because of the way they represented ((_b)>(_a)) internally.*/
+# define EC_MINI(_a,_b) ((_a)+(((_b)-(_a))&-((_b)<(_a))))
+
+/*Count leading zeros.
+ This macro should only be used for implementing ec_ilog(), if it is defined.
+ All other code should use EC_ILOG() instead.*/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+# include <intrin.h>
+/*In _DEBUG mode this is not an intrinsic by default.*/
+# pragma intrinsic(_BitScanReverse)
+
+static __inline int ec_bsr(unsigned long _x){
+ unsigned long ret;
+ _BitScanReverse(&ret,_x);
+ return (int)ret;
+}
+# define EC_CLZ0 (1)
+# define EC_CLZ(_x) (-ec_bsr(_x))
+#elif defined(ENABLE_TI_DSPLIB)
+# include "dsplib.h"
+# define EC_CLZ0 (31)
+# define EC_CLZ(_x) (_lnorm(_x))
+#elif __GNUC_PREREQ(3,4)
+# if INT_MAX>=2147483647
+# define EC_CLZ0 ((int)sizeof(unsigned)*CHAR_BIT)
+# define EC_CLZ(_x) (__builtin_clz(_x))
+# elif LONG_MAX>=2147483647L
+# define EC_CLZ0 ((int)sizeof(unsigned long)*CHAR_BIT)
+# define EC_CLZ(_x) (__builtin_clzl(_x))
+# endif
+#endif
+
+#if defined(EC_CLZ)
+/*Note that __builtin_clz is not defined when _x==0, according to the gcc
+ documentation (and that of the BSR instruction that implements it on x86).
+ The majority of the time we can never pass it zero.
+ When we need to, it can be special cased.*/
+# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x))
+#else
+int ec_ilog(opus_uint32 _v);
+# define EC_ILOG(_x) (ec_ilog(_x))
+#endif
+#endif
diff --git a/drivers/opus/celt/entcode.c b/drivers/opus/celt/entcode.c
new file mode 100644
index 0000000000..fd817a9db5
--- /dev/null
+++ b/drivers/opus/celt/entcode.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "entcode.h"
+#include "arch.h"
+
+#if !defined(EC_CLZ)
+/*This is a fallback for systems where we don't know how to access
+ a BSR or CLZ instruction (see ecintrin.h).
+ If you are optimizing Opus on a new platform and it has a native CLZ or
+ BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
+ an easy performance win.*/
+int ec_ilog(opus_uint32 _v){
+ /*On a Pentium M, this branchless version tested as the fastest on
+ 1,000,000,000 random 32-bit integers, edging out a similar version with
+ branches, and a 256-entry LUT version.*/
+ int ret;
+ int m;
+ ret=!!_v;
+ m=!!(_v&0xFFFF0000)<<4;
+ _v>>=m;
+ ret|=m;
+ m=!!(_v&0xFF00)<<3;
+ _v>>=m;
+ ret|=m;
+ m=!!(_v&0xF0)<<2;
+ _v>>=m;
+ ret|=m;
+ m=!!(_v&0xC)<<1;
+ _v>>=m;
+ ret|=m;
+ ret+=!!(_v&0x2);
+ return ret;
+}
+#endif
+
+opus_uint32 ec_tell_frac(ec_ctx *_this){
+ opus_uint32 nbits;
+ opus_uint32 r;
+ int l;
+ int i;
+ /*To handle the non-integral number of bits still left in the encoder/decoder
+ state, we compute the worst-case number of bits of val that must be
+ encoded to ensure that the value is inside the range for any possible
+ subsequent bits.
+ The computation here is independent of val itself (the decoder does not
+ even track that value), even though the real number of bits used after
+ ec_enc_done() may be 1 smaller if rng is a power of two and the
+ corresponding trailing bits of val are all zeros.
+ If we did try to track that special case, then coding a value with a
+ probability of 1/(1<<n) might sometimes appear to use more than n bits.
+ This may help explain the surprising result that a newly initialized
+ encoder or decoder claims to have used 1 bit.*/
+ nbits=_this->nbits_total<<BITRES;
+ l=EC_ILOG(_this->rng);
+ r=_this->rng>>(l-16);
+ for(i=BITRES;i-->0;){
+ int b;
+ r=r*r>>15;
+ b=(int)(r>>16);
+ l=l<<1|b;
+ r>>=b;
+ }
+ return nbits-l;
+}
diff --git a/drivers/opus/celt/entcode.h b/drivers/opus/celt/entcode.h
new file mode 100644
index 0000000000..dd13e49e50
--- /dev/null
+++ b/drivers/opus/celt/entcode.h
@@ -0,0 +1,117 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+ Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if !defined(_entcode_H)
+# define _entcode_H (1)
+# include <limits.h>
+# include <stddef.h>
+# include "ecintrin.h"
+
+/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
+ larger type, you can speed up the decoder by using it here.*/
+typedef opus_uint32 ec_window;
+typedef struct ec_ctx ec_ctx;
+typedef struct ec_ctx ec_enc;
+typedef struct ec_ctx ec_dec;
+
+# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT)
+
+/*The number of bits to use for the range-coded part of unsigned integers.*/
+# define EC_UINT_BITS (8)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+ 3 => 1/8th bits.*/
+# define BITRES 3
+
+/*The entropy encoder/decoder context.
+ We use the same structure for both, so that common functions like ec_tell()
+ can be used on either one.*/
+struct ec_ctx{
+ /*Buffered input/output.*/
+ unsigned char *buf;
+ /*The size of the buffer.*/
+ opus_uint32 storage;
+ /*The offset at which the last byte containing raw bits was read/written.*/
+ opus_uint32 end_offs;
+ /*Bits that will be read from/written at the end.*/
+ ec_window end_window;
+ /*Number of valid bits in end_window.*/
+ int nend_bits;
+ /*The total number of whole bits read/written.
+ This does not include partial bits currently in the range coder.*/
+ int nbits_total;
+ /*The offset at which the next range coder byte will be read/written.*/
+ opus_uint32 offs;
+ /*The number of values in the current range.*/
+ opus_uint32 rng;
+ /*In the decoder: the difference between the top of the current range and
+ the input value, minus one.
+ In the encoder: the low end of the current range.*/
+ opus_uint32 val;
+ /*In the decoder: the saved normalization factor from ec_decode().
+ In the encoder: the number of oustanding carry propagating symbols.*/
+ opus_uint32 ext;
+ /*A buffered input/output symbol, awaiting carry propagation.*/
+ int rem;
+ /*Nonzero if an error occurred.*/
+ int error;
+};
+
+static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){
+ return _this->offs;
+}
+
+static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){
+ return _this->buf;
+}
+
+static OPUS_INLINE int ec_get_error(ec_ctx *_this){
+ return _this->error;
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+ This same number can be computed in either the encoder or the decoder, and is
+ suitable for making coding decisions.
+ Return: The number of bits.
+ This will always be slightly larger than the exact value (e.g., all
+ rounding error is in the positive direction).*/
+static OPUS_INLINE int ec_tell(ec_ctx *_this){
+ return _this->nbits_total-EC_ILOG(_this->rng);
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+ This same number can be computed in either the encoder or the decoder, and is
+ suitable for making coding decisions.
+ Return: The number of bits scaled by 2**BITRES.
+ This will always be slightly larger than the exact value (e.g., all
+ rounding error is in the positive direction).*/
+opus_uint32 ec_tell_frac(ec_ctx *_this);
+
+#endif
diff --git a/drivers/opus/celt/entdec.c b/drivers/opus/celt/entdec.c
new file mode 100644
index 0000000000..383da571c9
--- /dev/null
+++ b/drivers/opus/celt/entdec.c
@@ -0,0 +1,245 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+ Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stddef.h>
+#include "os_support.h"
+#include "arch.h"
+#include "entdec.h"
+#include "mfrngcod.h"
+
+/*A range decoder.
+ This is an entropy decoder based upon \cite{Mar79}, which is itself a
+ rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
+ It is very similar to arithmetic encoding, except that encoding is done with
+ digits in any base, instead of with bits, and so it is faster when using
+ larger bases (i.e.: a byte).
+ The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
+ is the base, longer than the theoretical optimum, but to my knowledge there
+ is no published justification for this claim.
+ This only seems true when using near-infinite precision arithmetic so that
+ the process is carried out with no rounding errors.
+
+ An excellent description of implementation details is available at
+ http://www.arturocampos.com/ac_range.html
+ A recent work \cite{MNW98} which proposes several changes to arithmetic
+ encoding for efficiency actually re-discovers many of the principles
+ behind range encoding, and presents a good theoretical analysis of them.
+
+ End of stream is handled by writing out the smallest number of bits that
+ ensures that the stream will be correctly decoded regardless of the value of
+ any subsequent bits.
+ ec_tell() can be used to determine how many bits were needed to decode
+ all the symbols thus far; other data can be packed in the remaining bits of
+ the input buffer.
+ @PHDTHESIS{Pas76,
+ author="Richard Clark Pasco",
+ title="Source coding algorithms for fast data compression",
+ school="Dept. of Electrical Engineering, Stanford University",
+ address="Stanford, CA",
+ month=May,
+ year=1976
+ }
+ @INPROCEEDINGS{Mar79,
+ author="Martin, G.N.N.",
+ title="Range encoding: an algorithm for removing redundancy from a digitised
+ message",
+ booktitle="Video & Data Recording Conference",
+ year=1979,
+ address="Southampton",
+ month=Jul
+ }
+ @ARTICLE{MNW98,
+ author="Alistair Moffat and Radford Neal and Ian H. Witten",
+ title="Arithmetic Coding Revisited",
+ journal="{ACM} Transactions on Information Systems",
+ year=1998,
+ volume=16,
+ number=3,
+ pages="256--294",
+ month=Jul,
+ URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf"
+ }*/
+
+static int ec_read_byte(ec_dec *_this){
+ return _this->offs<_this->storage?_this->buf[_this->offs++]:0;
+}
+
+static int ec_read_byte_from_end(ec_dec *_this){
+ return _this->end_offs<_this->storage?
+ _this->buf[_this->storage-++(_this->end_offs)]:0;
+}
+
+/*Normalizes the contents of val and rng so that rng lies entirely in the
+ high-order symbol.*/
+static void ec_dec_normalize(ec_dec *_this){
+ /*If the range is too small, rescale it and input some bits.*/
+ while(_this->rng<=EC_CODE_BOT){
+ int sym;
+ _this->nbits_total+=EC_SYM_BITS;
+ _this->rng<<=EC_SYM_BITS;
+ /*Use up the remaining bits from our last symbol.*/
+ sym=_this->rem;
+ /*Read the next value from the input.*/
+ _this->rem=ec_read_byte(_this);
+ /*Take the rest of the bits we need from this new symbol.*/
+ sym=(sym<<EC_SYM_BITS|_this->rem)>>(EC_SYM_BITS-EC_CODE_EXTRA);
+ /*And subtract them from val, capped to be less than EC_CODE_TOP.*/
+ _this->val=((_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym))&(EC_CODE_TOP-1);
+ }
+}
+
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
+ _this->buf=_buf;
+ _this->storage=_storage;
+ _this->end_offs=0;
+ _this->end_window=0;
+ _this->nend_bits=0;
+ /*This is the offset from which ec_tell() will subtract partial bits.
+ The final value after the ec_dec_normalize() call will be the same as in
+ the encoder, but we have to compensate for the bits that are added there.*/
+ _this->nbits_total=EC_CODE_BITS+1
+ -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS;
+ _this->offs=0;
+ _this->rng=1U<<EC_CODE_EXTRA;
+ _this->rem=ec_read_byte(_this);
+ _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA));
+ _this->error=0;
+ /*Normalize the interval.*/
+ ec_dec_normalize(_this);
+}
+
+unsigned ec_decode(ec_dec *_this,unsigned _ft){
+ unsigned s;
+ _this->ext=_this->rng/_ft;
+ s=(unsigned)(_this->val/_this->ext);
+ return _ft-EC_MINI(s+1,_ft);
+}
+
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){
+ unsigned s;
+ _this->ext=_this->rng>>_bits;
+ s=(unsigned)(_this->val/_this->ext);
+ return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits);
+}
+
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+ opus_uint32 s;
+ s=IMUL32(_this->ext,_ft-_fh);
+ _this->val-=s;
+ _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s;
+ ec_dec_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){
+ opus_uint32 r;
+ opus_uint32 d;
+ opus_uint32 s;
+ int ret;
+ r=_this->rng;
+ d=_this->val;
+ s=r>>_logp;
+ ret=d<s;
+ if(!ret)_this->val=d-s;
+ _this->rng=ret?s:r-s;
+ ec_dec_normalize(_this);
+ return ret;
+}
+
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
+ opus_uint32 r;
+ opus_uint32 d;
+ opus_uint32 s;
+ opus_uint32 t;
+ int ret;
+ s=_this->rng;
+ d=_this->val;
+ r=s>>_ftb;
+ ret=-1;
+ do{
+ t=s;
+ s=IMUL32(r,_icdf[++ret]);
+ }
+ while(d<s);
+ _this->val=d-s;
+ _this->rng=t-s;
+ ec_dec_normalize(_this);
+ return ret;
+}
+
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
+ unsigned ft;
+ unsigned s;
+ int ftb;
+ /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+ celt_assert(_ft>1);
+ _ft--;
+ ftb=EC_ILOG(_ft);
+ if(ftb>EC_UINT_BITS){
+ opus_uint32 t;
+ ftb-=EC_UINT_BITS;
+ ft=(unsigned)(_ft>>ftb)+1;
+ s=ec_decode(_this,ft);
+ ec_dec_update(_this,s,s+1,ft);
+ t=(opus_uint32)s<<ftb|ec_dec_bits(_this,ftb);
+ if(t<=_ft)return t;
+ _this->error=1;
+ return _ft;
+ }
+ else{
+ _ft++;
+ s=ec_decode(_this,(unsigned)_ft);
+ ec_dec_update(_this,s,s+1,(unsigned)_ft);
+ return s;
+ }
+}
+
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){
+ ec_window window;
+ int available;
+ opus_uint32 ret;
+ window=_this->end_window;
+ available=_this->nend_bits;
+ if((unsigned)available<_bits){
+ do{
+ window|=(ec_window)ec_read_byte_from_end(_this)<<available;
+ available+=EC_SYM_BITS;
+ }
+ while(available<=EC_WINDOW_SIZE-EC_SYM_BITS);
+ }
+ ret=(opus_uint32)window&(((opus_uint32)1<<_bits)-1U);
+ window>>=_bits;
+ available-=_bits;
+ _this->end_window=window;
+ _this->nend_bits=available;
+ _this->nbits_total+=_bits;
+ return ret;
+}
diff --git a/drivers/opus/celt/entdec.h b/drivers/opus/celt/entdec.h
new file mode 100644
index 0000000000..d8ab318730
--- /dev/null
+++ b/drivers/opus/celt/entdec.h
@@ -0,0 +1,100 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+ Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entdec_H)
+# define _entdec_H (1)
+# include <limits.h>
+# include "entcode.h"
+
+/*Initializes the decoder.
+ _buf: The input buffer to use.
+ Return: 0 on success, or a negative value on error.*/
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage);
+
+/*Calculates the cumulative frequency for the next symbol.
+ This can then be fed into the probability model to determine what that
+ symbol is, and the additional frequency information required to advance to
+ the next symbol.
+ This function cannot be called more than once without a corresponding call to
+ ec_dec_update(), or decoding will not proceed correctly.
+ _ft: The total frequency of the symbols in the alphabet the next symbol was
+ encoded with.
+ Return: A cumulative frequency representing the encoded symbol.
+ If the cumulative frequency of all the symbols before the one that
+ was encoded was fl, and the cumulative frequency of all the symbols
+ up to and including the one encoded is fh, then the returned value
+ will fall in the range [fl,fh).*/
+unsigned ec_decode(ec_dec *_this,unsigned _ft);
+
+/*Equivalent to ec_decode() with _ft==1<<_bits.*/
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits);
+
+/*Advance the decoder past the next symbol using the frequency information the
+ symbol was encoded with.
+ Exactly one call to ec_decode() must have been made so that all necessary
+ intermediate calculations are performed.
+ _fl: The cumulative frequency of all symbols that come before the symbol
+ decoded.
+ _fh: The cumulative frequency of all symbols up to and including the symbol
+ decoded.
+ Together with _fl, this defines the range [_fl,_fh) in which the value
+ returned above must fall.
+ _ft: The total frequency of the symbols in the alphabet the symbol decoded
+ was encoded in.
+ This must be the same as passed to the preceding call to ec_decode().*/
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/* Decode a bit that has a 1/(1<<_logp) probability of being a one */
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
+
+/*Decodes a symbol given an "inverse" CDF table.
+ No call to ec_dec_update() is necessary after this call.
+ _icdf: The "inverse" CDF, such that symbol s falls in the range
+ [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
+ The values must be monotonically non-increasing, and the last value
+ must be 0.
+ _ftb: The number of bits of precision in the cumulative distribution.
+ Return: The decoded symbol s.*/
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
+
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+ The bits must have been encoded with ec_enc_uint().
+ No call to ec_dec_update() is necessary after this call.
+ _ft: The number of integers that can be decoded (one more than the max).
+ This must be at least one, and no more than 2**32-1.
+ Return: The decoded bits.*/
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft);
+
+/*Extracts a sequence of raw bits from the stream.
+ The bits must have been encoded with ec_enc_bits().
+ No call to ec_dec_update() is necessary after this call.
+ _ftb: The number of bits to extract.
+ This must be between 0 and 25, inclusive.
+ Return: The decoded bits.*/
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb);
+
+#endif
diff --git a/drivers/opus/celt/entenc.c b/drivers/opus/celt/entenc.c
new file mode 100644
index 0000000000..299329c63f
--- /dev/null
+++ b/drivers/opus/celt/entenc.c
@@ -0,0 +1,294 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+ Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(OPUS_HAVE_CONFIG_H)
+# include "opus_config.h"
+#endif
+#include "os_support.h"
+#include "arch.h"
+#include "entenc.h"
+#include "mfrngcod.h"
+
+/*A range encoder.
+ See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
+
+ @INPROCEEDINGS{Mar79,
+ author="Martin, G.N.N.",
+ title="Range encoding: an algorithm for removing redundancy from a digitised
+ message",
+ booktitle="Video \& Data Recording Conference",
+ year=1979,
+ address="Southampton",
+ month=Jul
+ }
+ @ARTICLE{MNW98,
+ author="Alistair Moffat and Radford Neal and Ian H. Witten",
+ title="Arithmetic Coding Revisited",
+ journal="{ACM} Transactions on Information Systems",
+ year=1998,
+ volume=16,
+ number=3,
+ pages="256--294",
+ month=Jul,
+ URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
+ }*/
+
+static int ec_write_byte(ec_enc *_this,unsigned _value){
+ if(_this->offs+_this->end_offs>=_this->storage)return -1;
+ _this->buf[_this->offs++]=(unsigned char)_value;
+ return 0;
+}
+
+static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){
+ if(_this->offs+_this->end_offs>=_this->storage)return -1;
+ _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value;
+ return 0;
+}
+
+/*Outputs a symbol, with a carry bit.
+ If there is a potential to propagate a carry over several symbols, they are
+ buffered until it can be determined whether or not an actual carry will
+ occur.
+ If the counter for the buffered symbols overflows, then the stream becomes
+ undecodable.
+ This gives a theoretical limit of a few billion symbols in a single packet on
+ 32-bit systems.
+ The alternative is to truncate the range in order to force a carry, but
+ requires similar carry tracking in the decoder, needlessly slowing it down.*/
+static void ec_enc_carry_out(ec_enc *_this,int _c){
+ if(_c!=EC_SYM_MAX){
+ /*No further carry propagation possible, flush buffer.*/
+ int carry;
+ carry=_c>>EC_SYM_BITS;
+ /*Don't output a byte on the first write.
+ This compare should be taken care of by branch-prediction thereafter.*/
+ if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry);
+ if(_this->ext>0){
+ unsigned sym;
+ sym=(EC_SYM_MAX+carry)&EC_SYM_MAX;
+ do _this->error|=ec_write_byte(_this,sym);
+ while(--(_this->ext)>0);
+ }
+ _this->rem=_c&EC_SYM_MAX;
+ }
+ else _this->ext++;
+}
+
+static void ec_enc_normalize(ec_enc *_this){
+ /*If the range is too small, output some bits and rescale it.*/
+ while(_this->rng<=EC_CODE_BOT){
+ ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
+ /*Move the next-to-high-order symbol into the high-order position.*/
+ _this->val=(_this->val<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+ _this->rng<<=EC_SYM_BITS;
+ _this->nbits_total+=EC_SYM_BITS;
+ }
+}
+
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
+ _this->buf=_buf;
+ _this->end_offs=0;
+ _this->end_window=0;
+ _this->nend_bits=0;
+ /*This is the offset from which ec_tell() will subtract partial bits.*/
+ _this->nbits_total=EC_CODE_BITS+1;
+ _this->offs=0;
+ _this->rng=EC_CODE_TOP;
+ _this->rem=-1;
+ _this->val=0;
+ _this->ext=0;
+ _this->storage=_size;
+ _this->error=0;
+}
+
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+ opus_uint32 r;
+ r=_this->rng/_ft;
+ if(_fl>0){
+ _this->val+=_this->rng-IMUL32(r,(_ft-_fl));
+ _this->rng=IMUL32(r,(_fh-_fl));
+ }
+ else _this->rng-=IMUL32(r,(_ft-_fh));
+ ec_enc_normalize(_this);
+}
+
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){
+ opus_uint32 r;
+ r=_this->rng>>_bits;
+ if(_fl>0){
+ _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl));
+ _this->rng=IMUL32(r,(_fh-_fl));
+ }
+ else _this->rng-=IMUL32(r,((1U<<_bits)-_fh));
+ ec_enc_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){
+ opus_uint32 r;
+ opus_uint32 s;
+ opus_uint32 l;
+ r=_this->rng;
+ l=_this->val;
+ s=r>>_logp;
+ r-=s;
+ if(_val)_this->val=l+r;
+ _this->rng=_val?s:r;
+ ec_enc_normalize(_this);
+}
+
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
+ opus_uint32 r;
+ r=_this->rng>>_ftb;
+ if(_s>0){
+ _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
+ _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
+ }
+ else _this->rng-=IMUL32(r,_icdf[_s]);
+ ec_enc_normalize(_this);
+}
+
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
+ unsigned ft;
+ unsigned fl;
+ int ftb;
+ /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+ celt_assert(_ft>1);
+ _ft--;
+ ftb=EC_ILOG(_ft);
+ if(ftb>EC_UINT_BITS){
+ ftb-=EC_UINT_BITS;
+ ft=(_ft>>ftb)+1;
+ fl=(unsigned)(_fl>>ftb);
+ ec_encode(_this,fl,fl+1,ft);
+ ec_enc_bits(_this,_fl&(((opus_uint32)1<<ftb)-1U),ftb);
+ }
+ else ec_encode(_this,_fl,_fl+1,_ft+1);
+}
+
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _bits){
+ ec_window window;
+ int used;
+ window=_this->end_window;
+ used=_this->nend_bits;
+ celt_assert(_bits>0);
+ if(used+_bits>EC_WINDOW_SIZE){
+ do{
+ _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+ window>>=EC_SYM_BITS;
+ used-=EC_SYM_BITS;
+ }
+ while(used>=EC_SYM_BITS);
+ }
+ window|=(ec_window)_fl<<used;
+ used+=_bits;
+ _this->end_window=window;
+ _this->nend_bits=used;
+ _this->nbits_total+=_bits;
+}
+
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){
+ int shift;
+ unsigned mask;
+ celt_assert(_nbits<=EC_SYM_BITS);
+ shift=EC_SYM_BITS-_nbits;
+ mask=((1<<_nbits)-1)<<shift;
+ if(_this->offs>0){
+ /*The first byte has been finalized.*/
+ _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<<shift);
+ }
+ else if(_this->rem>=0){
+ /*The first byte is still awaiting carry propagation.*/
+ _this->rem=(_this->rem&~mask)|_val<<shift;
+ }
+ else if(_this->rng<=(EC_CODE_TOP>>_nbits)){
+ /*The renormalization loop has never been run.*/
+ _this->val=(_this->val&~((opus_uint32)mask<<EC_CODE_SHIFT))|
+ (opus_uint32)_val<<(EC_CODE_SHIFT+shift);
+ }
+ /*The encoder hasn't even encoded _nbits of data yet.*/
+ else _this->error=-1;
+}
+
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){
+ celt_assert(_this->offs+_this->end_offs<=_size);
+ OPUS_MOVE(_this->buf+_size-_this->end_offs,
+ _this->buf+_this->storage-_this->end_offs,_this->end_offs);
+ _this->storage=_size;
+}
+
+void ec_enc_done(ec_enc *_this){
+ ec_window window;
+ int used;
+ opus_uint32 msk;
+ opus_uint32 end;
+ int l;
+ /*We output the minimum number of bits that ensures that the symbols encoded
+ thus far will be decoded correctly regardless of the bits that follow.*/
+ l=EC_CODE_BITS-EC_ILOG(_this->rng);
+ msk=(EC_CODE_TOP-1)>>l;
+ end=(_this->val+msk)&~msk;
+ if((end|msk)>=_this->val+_this->rng){
+ l++;
+ msk>>=1;
+ end=(_this->val+msk)&~msk;
+ }
+ while(l>0){
+ ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT));
+ end=(end<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+ l-=EC_SYM_BITS;
+ }
+ /*If we have a buffered byte flush it into the output buffer.*/
+ if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0);
+ /*If we have buffered extra bits, flush them as well.*/
+ window=_this->end_window;
+ used=_this->nend_bits;
+ while(used>=EC_SYM_BITS){
+ _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+ window>>=EC_SYM_BITS;
+ used-=EC_SYM_BITS;
+ }
+ /*Clear any excess space and add any remaining extra bits to the last byte.*/
+ if(!_this->error){
+ OPUS_CLEAR(_this->buf+_this->offs,
+ _this->storage-_this->offs-_this->end_offs);
+ if(used>0){
+ /*If there's no range coder data at all, give up.*/
+ if(_this->end_offs>=_this->storage)_this->error=-1;
+ else{
+ l=-l;
+ /*If we've busted, don't add too many extra bits to the last byte; it
+ would corrupt the range coder data, and that's more important.*/
+ if(_this->offs+_this->end_offs>=_this->storage&&l<used){
+ window&=(1<<l)-1;
+ _this->error=-1;
+ }
+ _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window;
+ }
+ }
+ }
+}
diff --git a/drivers/opus/celt/entenc.h b/drivers/opus/celt/entenc.h
new file mode 100644
index 0000000000..796bc4d572
--- /dev/null
+++ b/drivers/opus/celt/entenc.h
@@ -0,0 +1,110 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+ Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entenc_H)
+# define _entenc_H (1)
+# include <stddef.h>
+# include "entcode.h"
+
+/*Initializes the encoder.
+ _buf: The buffer to store output bytes in.
+ _size: The size of the buffer, in chars.*/
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size);
+/*Encodes a symbol given its frequency information.
+ The frequency information must be discernable by the decoder, assuming it
+ has read only the previous symbols from the stream.
+ It is allowable to change the frequency information, or even the entire
+ source alphabet, so long as the decoder can tell from the context of the
+ previously encoded information that it is supposed to do so as well.
+ _fl: The cumulative frequency of all symbols that come before the one to be
+ encoded.
+ _fh: The cumulative frequency of all symbols up to and including the one to
+ be encoded.
+ Together with _fl, this defines the range [_fl,_fh) in which the
+ decoded value will fall.
+ _ft: The sum of the frequencies of all the symbols*/
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/*Equivalent to ec_encode() with _ft==1<<_bits.*/
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits);
+
+/* Encode a bit that has a 1/(1<<_logp) probability of being a one */
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
+
+/*Encodes a symbol given an "inverse" CDF table.
+ _s: The index of the symbol to encode.
+ _icdf: The "inverse" CDF, such that symbol _s falls in the range
+ [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
+ The values must be monotonically non-increasing, and the last value
+ must be 0.
+ _ftb: The number of bits of precision in the cumulative distribution.*/
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
+
+/*Encodes a raw unsigned integer in the stream.
+ _fl: The integer to encode.
+ _ft: The number of integers that can be encoded (one more than the max).
+ This must be at least one, and no more than 2**32-1.*/
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft);
+
+/*Encodes a sequence of raw bits in the stream.
+ _fl: The bits to encode.
+ _ftb: The number of bits to encode.
+ This must be between 1 and 25, inclusive.*/
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb);
+
+/*Overwrites a few bits at the very start of an existing stream, after they
+ have already been encoded.
+ This makes it possible to have a few flags up front, where it is easy for
+ decoders to access them without parsing the whole stream, even if their
+ values are not determined until late in the encoding process, without having
+ to buffer all the intermediate symbols in the encoder.
+ In order for this to work, at least _nbits bits must have already been
+ encoded using probabilities that are an exact power of two.
+ The encoder can verify the number of encoded bits is sufficient, but cannot
+ check this latter condition.
+ _val: The bits to encode (in the least _nbits significant bits).
+ They will be decoded in order from most-significant to least.
+ _nbits: The number of bits to overwrite.
+ This must be no more than 8.*/
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits);
+
+/*Compacts the data to fit in the target size.
+ This moves up the raw bits at the end of the current buffer so they are at
+ the end of the new buffer size.
+ The caller must ensure that the amount of data that's already been written
+ will fit in the new size.
+ _size: The number of bytes in the new buffer.
+ This must be large enough to contain the bits already written, and
+ must be no larger than the existing size.*/
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size);
+
+/*Indicates that there are no more symbols to encode.
+ All reamining output bytes are flushed to the output buffer.
+ ec_enc_init() must be called before the encoder can be used again.*/
+void ec_enc_done(ec_enc *_this);
+
+#endif
diff --git a/drivers/opus/celt/fixed_debug.h b/drivers/opus/celt/fixed_debug.h
new file mode 100644
index 0000000000..80bc94910f
--- /dev/null
+++ b/drivers/opus/celt/fixed_debug.h
@@ -0,0 +1,773 @@
+/* Copyright (C) 2003-2008 Jean-Marc Valin
+ Copyright (C) 2007-2012 Xiph.Org Foundation */
+/**
+ @file fixed_debug.h
+ @brief Fixed-point operations with debugging
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_DEBUG_H
+#define FIXED_DEBUG_H
+
+#include <stdio.h>
+#include "opus_defines.h"
+
+#ifdef CELT_C
+OPUS_EXPORT opus_int64 celt_mips=0;
+#else
+extern opus_int64 celt_mips;
+#endif
+
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16)
+
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
+#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
+#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1))
+
+#define SHR(a,b) SHR32(a,b)
+#define PSHR(a,b) PSHR32(a,b)
+
+static OPUS_INLINE short NEG16(int x)
+{
+ int res;
+ if (!VERIFY_SHORT(x))
+ {
+ fprintf (stderr, "NEG16: input is not short: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = -x;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "NEG16: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips++;
+ return res;
+}
+static OPUS_INLINE int NEG32(opus_int64 x)
+{
+ opus_int64 res;
+ if (!VERIFY_INT(x))
+ {
+ fprintf (stderr, "NEG16: input is not int: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = -x;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "NEG16: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+
+#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__)
+static OPUS_INLINE short EXTRACT16_(int x, char *file, int line)
+{
+ int res;
+ if (!VERIFY_SHORT(x))
+ {
+ fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = x;
+ celt_mips++;
+ return res;
+}
+
+#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__)
+static OPUS_INLINE int EXTEND32_(int x, char *file, int line)
+{
+ int res;
+ if (!VERIFY_SHORT(x))
+ {
+ fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = x;
+ celt_mips++;
+ return res;
+}
+
+#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line)
+{
+ int res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+ {
+ fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a>>shift;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips++;
+ return res;
+}
+#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line)
+{
+ int res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+ {
+ fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a<<shift;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips++;
+ return res;
+}
+
+static OPUS_INLINE int SHR32(opus_int64 a, int shift)
+{
+ opus_int64 res;
+ if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+ {
+ fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a>>shift;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "SHR32: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
+{
+ opus_int64 res;
+ if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+ {
+ fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a<<shift;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "SHL32: output is not int: %lld<<%d = %lld in %s: line %d\n", a, shift, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+
+#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift))
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
+#define HALF16(x) (SHR16(x,1))
+#define HALF32(x) (SHR32(x,1))
+
+//#define SHR(a,shift) ((a) >> (shift))
+//#define SHL(a,shift) ((a) << (shift))
+
+#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
+{
+ int res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a+b;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips++;
+ return res;
+}
+
+#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short SUB16_(int a, int b, char *file, int line)
+{
+ int res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a-b;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips++;
+ return res;
+}
+
+#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+ opus_int64 res;
+ if (!VERIFY_INT(a) || !VERIFY_INT(b))
+ {
+ fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a+b;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+
+#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+ opus_int64 res;
+ if (!VERIFY_INT(a) || !VERIFY_INT(b))
+ {
+ fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a-b;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+
+#undef UADD32
+#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+ opus_uint64 res;
+ if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+ {
+ fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a+b;
+ if (!VERIFY_UINT(res))
+ {
+ fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+
+#undef USUB32
+#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+ opus_uint64 res;
+ if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+ {
+ fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ if (a<b)
+ {
+ fprintf (stderr, "USUB32: inputs underflow: %llu < %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a-b;
+ if (!VERIFY_UINT(res))
+ {
+ fprintf (stderr, "USUB32: output is not uint32: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+
+/* result fits in 16 bits */
+static OPUS_INLINE short MULT16_16_16(int a, int b)
+{
+ int res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a*b;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips++;
+ return res;
+}
+
+#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips++;
+ return res;
+}
+
+#define MAC16_16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))
+
+#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+ {
+ fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ if (ABS32(b)>=((opus_val32)(1)<<(15+Q)))
+ {
+ fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = (((opus_int64)a)*(opus_int64)b) >> Q;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ if (Q==15)
+ celt_mips+=3;
+ else
+ celt_mips+=4;
+ return res;
+}
+
+#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+ {
+ fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ if (ABS32(b)>=((opus_int64)(1)<<(15+Q)))
+ {
+ fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<<Q)>>1))>> Q;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ if (Q==15)
+ celt_mips+=4;
+ else
+ celt_mips+=5;
+ return res;
+}
+
+#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
+#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
+
+static OPUS_INLINE int SATURATE(int a, int b)
+{
+ if (a>b)
+ a=b;
+ if (a<-b)
+ a = -b;
+ celt_mips+=3;
+ return a;
+}
+
+static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a)
+{
+ celt_mips+=3;
+ if (a>32767)
+ return 32767;
+ else if (a<-32768)
+ return -32768;
+ else return a;
+}
+
+static OPUS_INLINE int MULT16_16_Q11_32(int a, int b)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ res >>= 11;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=3;
+ return res;
+}
+static OPUS_INLINE short MULT16_16_Q13(int a, int b)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ res >>= 13;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=3;
+ return res;
+}
+static OPUS_INLINE short MULT16_16_Q14(int a, int b)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ res >>= 14;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=3;
+ return res;
+}
+
+#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ res >>= 15;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=1;
+ return res;
+}
+
+static OPUS_INLINE short MULT16_16_P13(int a, int b)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ res += 4096;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res >>= 13;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=4;
+ return res;
+}
+static OPUS_INLINE short MULT16_16_P14(int a, int b)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ res += 8192;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res >>= 14;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=4;
+ return res;
+}
+static OPUS_INLINE short MULT16_16_P15(int a, int b)
+{
+ opus_int64 res;
+ if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = ((opus_int64)a)*b;
+ res += 16384;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res >>= 15;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=2;
+ return res;
+}
+
+#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__)
+
+static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+ opus_int64 res;
+ if (b==0)
+ {
+ fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ return 0;
+ }
+ if (!VERIFY_INT(a) || !VERIFY_SHORT(b))
+ {
+ fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a/b;
+ if (!VERIFY_SHORT(res))
+ {
+ fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line);
+ if (res>32767)
+ res = 32767;
+ if (res<-32768)
+ res = -32768;
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=35;
+ return res;
+}
+
+#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+ opus_int64 res;
+ if (b==0)
+ {
+ fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ return 0;
+ }
+
+ if (!VERIFY_INT(a) || !VERIFY_INT(b))
+ {
+ fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ res = a/b;
+ if (!VERIFY_INT(res))
+ {
+ fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ celt_assert(0);
+#endif
+ }
+ celt_mips+=70;
+ return res;
+}
+
+#undef PRINT_MIPS
+#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);
+
+#endif
diff --git a/drivers/opus/celt/fixed_generic.h b/drivers/opus/celt/fixed_generic.h
new file mode 100644
index 0000000000..ecf018a244
--- /dev/null
+++ b/drivers/opus/celt/fixed_generic.h
@@ -0,0 +1,134 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+ Copyright (C) 2003-2008 Jean-Marc Valin
+ Copyright (C) 2007-2008 CSIRO */
+/**
+ @file fixed_generic.h
+ @brief Generic fixed-point operations
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_GENERIC_H
+#define FIXED_GENERIC_H
+
+/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
+#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
+/** Compile-time conversion of float constant to 16-bit value */
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Compile-time conversion of float constant to 32-bit value */
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Negate a 16-bit value */
+#define NEG16(x) (-(x))
+/** Negate a 32-bit value */
+#define NEG32(x) (-(x))
+
+/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */
+#define EXTRACT16(x) ((opus_val16)(x))
+/** Change a 16-bit value into a 32-bit value */
+#define EXTEND32(x) ((opus_val32)(x))
+
+/** Arithmetic shift-right of a 16-bit value */
+#define SHR16(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 16-bit value */
+#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift)))
+/** Arithmetic shift-right of a 32-bit value */
+#define SHR32(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 32-bit value */
+#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift)))
+
+/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */
+#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+/** 32-bit arithmetic shift right where the argument can be negative */
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+/** "RAW" macros, should not be used outside of this header file */
+#define SHR(a,shift) ((a) >> (shift))
+#define SHL(a,shift) SHL32(a,shift)
+#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+
+#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
+
+/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
+#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
+/** Divide by two */
+#define HALF16(x) (SHR16(x,1))
+#define HALF32(x) (SHR32(x,1))
+
+/** Add two 16-bit values */
+#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b)))
+/** Subtract two 16-bit values */
+#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b))
+/** Add two 32-bit values */
+#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b))
+/** Subtract two 32-bit values */
+#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
+
+/** 16x16 multiplication where the result fits in 16 bits */
+#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b))))
+
+/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */
+/** 16x16 multiplication where the result fits in 32 bits */
+#define MULT16_16(a,b) (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b)))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+ b must fit in 31 bits.
+ Result fits in 32 bits. */
+#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+
+#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
+#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
+#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
+
+#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
+#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
+#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
+
+/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */
+#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b))))
+
+/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
+#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
+
+#endif
diff --git a/drivers/opus/celt/float_cast.h b/drivers/opus/celt/float_cast.h
new file mode 100644
index 0000000000..ede6574860
--- /dev/null
+++ b/drivers/opus/celt/float_cast.h
@@ -0,0 +1,140 @@
+/* Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Version 1.1 */
+
+#ifndef FLOAT_CAST_H
+#define FLOAT_CAST_H
+
+
+#include "arch.h"
+
+/*============================================================================
+** On Intel Pentium processors (especially PIII and probably P4), converting
+** from float to int is very slow. To meet the C specs, the code produced by
+** most C compilers targeting Pentium needs to change the FPU rounding mode
+** before the float to int conversion is performed.
+**
+** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
+** is this flushing of the pipeline which is so slow.
+**
+** Fortunately the ISO C99 specifications define the functions lrint, lrintf,
+** llrint and llrintf which fix this problem as a side effect.
+**
+** On Unix-like systems, the configure process should have detected the
+** presence of these functions. If they weren't found we have to replace them
+** here with a standard C cast.
+*/
+
+/*
+** The C99 prototypes for lrint and lrintf are as follows:
+**
+** long int lrintf (float x) ;
+** long int lrint (double x) ;
+*/
+
+/* The presence of the required functions are detected during the configure
+** process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
+** the config.h file.
+*/
+
+#if (HAVE_LRINTF)
+
+/* These defines enable functionality introduced with the 1999 ISO C
+** standard. They must be defined before the inclusion of math.h to
+** engage them. If optimisation is enabled, these functions will be
+** inlined. With optimisation switched off, you have to link in the
+** maths library using -lm.
+*/
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrintf(x)
+
+#elif (defined(HAVE_LRINT))
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrint(x)
+
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64))
+ #include <xmmintrin.h>
+
+ __inline long int float2int(float value)
+ {
+ return _mm_cvtss_si32(_mm_load_ss(&value));
+ }
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32))
+ #include <math.h>
+
+ /* Win32 doesn't seem to have these functions.
+ ** Therefore implement OPUS_INLINE versions of these functions here.
+ */
+
+ __inline long int
+ float2int (float flt)
+ { int intgr;
+
+ _asm
+ { fld flt
+ fistp intgr
+ } ;
+
+ return intgr ;
+ }
+
+#else
+
+#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L)
+ /* supported by gcc in C99 mode, but not by all other compilers */
+ #warning "Don't have the functions lrint() and lrintf ()."
+ #warning "Replacing these functions with a standard C cast."
+#endif /* __STDC_VERSION__ >= 199901L */
+ #include <math.h>
+ #define float2int(flt) ((int)(floor(.5+flt)))
+#endif
+
+#ifndef DISABLE_FLOAT_API
+static OPUS_INLINE opus_int16 FLOAT2INT16(float x)
+{
+ x = x*CELT_SIG_SCALE;
+ x = MAX32(x, -32768);
+ x = MIN32(x, 32767);
+ return (opus_int16)float2int(x);
+}
+#endif /* DISABLE_FLOAT_API */
+
+#endif /* FLOAT_CAST_H */
diff --git a/drivers/opus/celt/kiss_fft.c b/drivers/opus/celt/kiss_fft.c
new file mode 100644
index 0000000000..333be975d1
--- /dev/null
+++ b/drivers/opus/celt/kiss_fft.c
@@ -0,0 +1,719 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+ Lots of modifications by Jean-Marc Valin
+ Copyright (c) 2005-2007, Xiph.Org Foundation
+ Copyright (c) 2008, Xiph.Org Foundation, CSIRO
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.*/
+
+/* This code is originally from Mark Borgerding's KISS-FFT but has been
+ heavily modified to better suit Opus */
+
+#ifndef SKIP_CONFIG_H
+# ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+# endif
+#endif
+
+#include "_kiss_fft_guts.h"
+#include "arch.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+/* The guts header contains all the multiplication and addition macros that are defined for
+ complex numbers. It also delares the kf_ internal functions.
+*/
+
+static void kf_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ kiss_fft_cpx * Fout2;
+ const kiss_twiddle_cpx * tw1;
+ int i,j;
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ Fout2 = Fout + m;
+ tw1 = st->twiddles;
+ for(j=0;j<m;j++)
+ {
+ kiss_fft_cpx t;
+ Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
+ Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
+ C_MUL (t, *Fout2 , *tw1);
+ tw1 += fstride;
+ C_SUB( *Fout2 , *Fout , t );
+ C_ADDTO( *Fout , t );
+ ++Fout2;
+ ++Fout;
+ }
+ }
+}
+
+static void ki_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ kiss_fft_cpx * Fout2;
+ const kiss_twiddle_cpx * tw1;
+ kiss_fft_cpx t;
+ int i,j;
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ Fout2 = Fout + m;
+ tw1 = st->twiddles;
+ for(j=0;j<m;j++)
+ {
+ C_MULC (t, *Fout2 , *tw1);
+ tw1 += fstride;
+ C_SUB( *Fout2 , *Fout , t );
+ C_ADDTO( *Fout , t );
+ ++Fout2;
+ ++Fout;
+ }
+ }
+}
+
+static void kf_bfly4(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+ kiss_fft_cpx scratch[6];
+ const size_t m2=2*m;
+ const size_t m3=3*m;
+ int i, j;
+
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ tw3 = tw2 = tw1 = st->twiddles;
+ for (j=0;j<m;j++)
+ {
+ C_MUL4(scratch[0],Fout[m] , *tw1 );
+ C_MUL4(scratch[1],Fout[m2] , *tw2 );
+ C_MUL4(scratch[2],Fout[m3] , *tw3 );
+
+ Fout->r = PSHR32(Fout->r, 2);
+ Fout->i = PSHR32(Fout->i, 2);
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
+
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ ++Fout;
+ }
+ }
+}
+
+static void ki_bfly4(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+ kiss_fft_cpx scratch[6];
+ const size_t m2=2*m;
+ const size_t m3=3*m;
+ int i, j;
+
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ tw3 = tw2 = tw1 = st->twiddles;
+ for (j=0;j<m;j++)
+ {
+ C_MULC(scratch[0],Fout[m] , *tw1 );
+ C_MULC(scratch[1],Fout[m2] , *tw2 );
+ C_MULC(scratch[2],Fout[m3] , *tw3 );
+
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
+
+ Fout[m].r = scratch[5].r - scratch[4].i;
+ Fout[m].i = scratch[5].i + scratch[4].r;
+ Fout[m3].r = scratch[5].r + scratch[4].i;
+ Fout[m3].i = scratch[5].i - scratch[4].r;
+ ++Fout;
+ }
+ }
+}
+
+#ifndef RADIX_TWO_ONLY
+
+static void kf_bfly3(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ int i;
+ size_t k;
+ const size_t m2 = 2*m;
+ const kiss_twiddle_cpx *tw1,*tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_twiddle_cpx epi3;
+
+ kiss_fft_cpx * Fout_beg = Fout;
+ epi3 = st->twiddles[fstride*m];
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ tw1=tw2=st->twiddles;
+ k=m;
+ do {
+ C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+ C_MUL(scratch[1],Fout[m] , *tw1);
+ C_MUL(scratch[2],Fout[m2] , *tw2);
+
+ C_ADD(scratch[3],scratch[1],scratch[2]);
+ C_SUB(scratch[0],scratch[1],scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride*2;
+
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+ C_MULBYSCALAR( scratch[0] , epi3.i );
+
+ C_ADDTO(*Fout,scratch[3]);
+
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
+
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
+
+ ++Fout;
+ } while(--k);
+ }
+}
+
+static void ki_bfly3(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ int i, k;
+ const size_t m2 = 2*m;
+ const kiss_twiddle_cpx *tw1,*tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_twiddle_cpx epi3;
+
+ kiss_fft_cpx * Fout_beg = Fout;
+ epi3 = st->twiddles[fstride*m];
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ tw1=tw2=st->twiddles;
+ k=m;
+ do{
+
+ C_MULC(scratch[1],Fout[m] , *tw1);
+ C_MULC(scratch[2],Fout[m2] , *tw2);
+
+ C_ADD(scratch[3],scratch[1],scratch[2]);
+ C_SUB(scratch[0],scratch[1],scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride*2;
+
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+ C_MULBYSCALAR( scratch[0] , -epi3.i );
+
+ C_ADDTO(*Fout,scratch[3]);
+
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
+
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
+
+ ++Fout;
+ }while(--k);
+ }
+}
+
+static void kf_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+ int i, u;
+ kiss_fft_cpx scratch[13];
+ const kiss_twiddle_cpx * twiddles = st->twiddles;
+ const kiss_twiddle_cpx *tw;
+ kiss_twiddle_cpx ya,yb;
+ kiss_fft_cpx * Fout_beg = Fout;
+
+ ya = twiddles[fstride*m];
+ yb = twiddles[fstride*2*m];
+ tw=st->twiddles;
+
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ Fout0=Fout;
+ Fout1=Fout0+m;
+ Fout2=Fout0+2*m;
+ Fout3=Fout0+3*m;
+ Fout4=Fout0+4*m;
+
+ for ( u=0; u<m; ++u ) {
+ C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+ scratch[0] = *Fout0;
+
+ C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+ C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+ C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+ C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+ C_ADD( scratch[7],scratch[1],scratch[4]);
+ C_SUB( scratch[10],scratch[1],scratch[4]);
+ C_ADD( scratch[8],scratch[2],scratch[3]);
+ C_SUB( scratch[9],scratch[2],scratch[3]);
+
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
+
+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+ scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+ scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+ C_SUB(*Fout1,scratch[5],scratch[6]);
+ C_ADD(*Fout4,scratch[5],scratch[6]);
+
+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+ scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+ scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+ C_ADD(*Fout2,scratch[11],scratch[12]);
+ C_SUB(*Fout3,scratch[11],scratch[12]);
+
+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+ }
+ }
+}
+
+static void ki_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_state *st,
+ int m,
+ int N,
+ int mm
+ )
+{
+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+ int i, u;
+ kiss_fft_cpx scratch[13];
+ const kiss_twiddle_cpx * twiddles = st->twiddles;
+ const kiss_twiddle_cpx *tw;
+ kiss_twiddle_cpx ya,yb;
+ kiss_fft_cpx * Fout_beg = Fout;
+
+ ya = twiddles[fstride*m];
+ yb = twiddles[fstride*2*m];
+ tw=st->twiddles;
+
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ Fout0=Fout;
+ Fout1=Fout0+m;
+ Fout2=Fout0+2*m;
+ Fout3=Fout0+3*m;
+ Fout4=Fout0+4*m;
+
+ for ( u=0; u<m; ++u ) {
+ scratch[0] = *Fout0;
+
+ C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
+ C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
+ C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
+ C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+ C_ADD( scratch[7],scratch[1],scratch[4]);
+ C_SUB( scratch[10],scratch[1],scratch[4]);
+ C_ADD( scratch[8],scratch[2],scratch[3]);
+ C_SUB( scratch[9],scratch[2],scratch[3]);
+
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
+
+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+ scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
+ scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
+
+ C_SUB(*Fout1,scratch[5],scratch[6]);
+ C_ADD(*Fout4,scratch[5],scratch[6]);
+
+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+ scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
+ scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
+
+ C_ADD(*Fout2,scratch[11],scratch[12]);
+ C_SUB(*Fout3,scratch[11],scratch[12]);
+
+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+ }
+ }
+}
+
+#endif
+
+
+#ifdef CUSTOM_MODES
+
+static
+void compute_bitrev_table(
+ int Fout,
+ opus_int16 *f,
+ const size_t fstride,
+ int in_stride,
+ opus_int16 * factors,
+ const kiss_fft_state *st
+ )
+{
+ const int p=*factors++; /* the radix */
+ const int m=*factors++; /* stage's fft length/p */
+
+ /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
+ if (m==1)
+ {
+ int j;
+ for (j=0;j<p;j++)
+ {
+ *f = Fout+j;
+ f += fstride*in_stride;
+ }
+ } else {
+ int j;
+ for (j=0;j<p;j++)
+ {
+ compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
+ f += fstride*in_stride;
+ Fout += m;
+ }
+ }
+}
+
+/* facbuf is populated by p1,m1,p2,m2, ...
+ where
+ p[i] * m[i] = m[i-1]
+ m0 = n */
+static
+int kf_factor(int n,opus_int16 * facbuf)
+{
+ int p=4;
+
+ /*factor out powers of 4, powers of 2, then any remaining primes */
+ do {
+ while (n % p) {
+ switch (p) {
+ case 4: p = 2; break;
+ case 2: p = 3; break;
+ default: p += 2; break;
+ }
+ if (p>32000 || (opus_int32)p*(opus_int32)p > n)
+ p = n; /* no more factors, skip to end */
+ }
+ n /= p;
+#ifdef RADIX_TWO_ONLY
+ if (p!=2 && p != 4)
+#else
+ if (p>5)
+#endif
+ {
+ return 0;
+ }
+ *facbuf++ = p;
+ *facbuf++ = n;
+ } while (n > 1);
+ return 1;
+}
+
+static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
+{
+ int i;
+#ifdef OPUS_FIXED_POINT
+ for (i=0;i<nfft;++i) {
+ opus_val32 phase = -i;
+ kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
+ }
+#else
+ for (i=0;i<nfft;++i) {
+ const double pi=3.14159265358979323846264338327;
+ double phase = ( -2*pi /nfft ) * i;
+ kf_cexp(twiddles+i, phase );
+ }
+#endif
+}
+
+/*
+ *
+ * Allocates all necessary storage space for the fft and ifft.
+ * The return value is a contiguous block of memory. As such,
+ * It can be freed with free().
+ * */
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
+{
+ kiss_fft_state *st=NULL;
+ size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
+
+ if ( lenmem==NULL ) {
+ st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
+ }else{
+ if (mem != NULL && *lenmem >= memneeded)
+ st = (kiss_fft_state*)mem;
+ *lenmem = memneeded;
+ }
+ if (st) {
+ opus_int16 *bitrev;
+ kiss_twiddle_cpx *twiddles;
+
+ st->nfft=nfft;
+#ifndef OPUS_FIXED_POINT
+ st->scale = 1.f/nfft;
+#endif
+ if (base != NULL)
+ {
+ st->twiddles = base->twiddles;
+ st->shift = 0;
+ while (nfft<<st->shift != base->nfft && st->shift < 32)
+ st->shift++;
+ if (st->shift>=32)
+ goto fail;
+ } else {
+ st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
+ compute_twiddles(twiddles, nfft);
+ st->shift = -1;
+ }
+ if (!kf_factor(nfft,st->factors))
+ {
+ goto fail;
+ }
+
+ /* bitrev */
+ st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
+ if (st->bitrev==NULL)
+ goto fail;
+ compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
+ }
+ return st;
+fail:
+ opus_fft_free(st);
+ return NULL;
+}
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
+{
+ return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
+}
+
+void opus_fft_free(const kiss_fft_state *cfg)
+{
+ if (cfg)
+ {
+ opus_free((opus_int16*)cfg->bitrev);
+ if (cfg->shift < 0)
+ opus_free((kiss_twiddle_cpx*)cfg->twiddles);
+ opus_free((kiss_fft_state*)cfg);
+ }
+}
+
+#endif /* CUSTOM_MODES */
+
+void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ int m2, m;
+ int p;
+ int L;
+ int fstride[MAXFACTORS];
+ int i;
+ int shift;
+
+ /* st->shift can be -1 */
+ shift = st->shift>0 ? st->shift : 0;
+
+ celt_assert2 (fin != fout, "In-place FFT not supported");
+ /* Bit-reverse the input */
+ for (i=0;i<st->nfft;i++)
+ {
+ fout[st->bitrev[i]] = fin[i];
+#ifndef OPUS_FIXED_POINT
+ fout[st->bitrev[i]].r *= st->scale;
+ fout[st->bitrev[i]].i *= st->scale;
+#endif
+ }
+
+ fstride[0] = 1;
+ L=0;
+ do {
+ p = st->factors[2*L];
+ m = st->factors[2*L+1];
+ fstride[L+1] = fstride[L]*p;
+ L++;
+ } while(m!=1);
+ m = st->factors[2*L-1];
+ for (i=L-1;i>=0;i--)
+ {
+ if (i!=0)
+ m2 = st->factors[2*i-1];
+ else
+ m2 = 1;
+ switch (st->factors[2*i])
+ {
+ case 2:
+ kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+ case 4:
+ kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+ #ifndef RADIX_TWO_ONLY
+ case 3:
+ kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+ case 5:
+ kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+ #endif
+ }
+ m = m2;
+ }
+}
+
+void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ int m2, m;
+ int p;
+ int L;
+ int fstride[MAXFACTORS];
+ int i;
+ int shift;
+
+ /* st->shift can be -1 */
+ shift = st->shift>0 ? st->shift : 0;
+ celt_assert2 (fin != fout, "In-place FFT not supported");
+ /* Bit-reverse the input */
+ for (i=0;i<st->nfft;i++)
+ fout[st->bitrev[i]] = fin[i];
+
+ fstride[0] = 1;
+ L=0;
+ do {
+ p = st->factors[2*L];
+ m = st->factors[2*L+1];
+ fstride[L+1] = fstride[L]*p;
+ L++;
+ } while(m!=1);
+ m = st->factors[2*L-1];
+ for (i=L-1;i>=0;i--)
+ {
+ if (i!=0)
+ m2 = st->factors[2*i-1];
+ else
+ m2 = 1;
+ switch (st->factors[2*i])
+ {
+ case 2:
+ ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+ case 4:
+ ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+#ifndef RADIX_TWO_ONLY
+ case 3:
+ ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+ case 5:
+ ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ break;
+#endif
+ }
+ m = m2;
+ }
+}
+
diff --git a/drivers/opus/celt/kiss_fft.h b/drivers/opus/celt/kiss_fft.h
new file mode 100644
index 0000000000..aa22b3a419
--- /dev/null
+++ b/drivers/opus/celt/kiss_fft.h
@@ -0,0 +1,139 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+ Lots of modifications by Jean-Marc Valin
+ Copyright (c) 2005-2007, Xiph.Org Foundation
+ Copyright (c) 2008, Xiph.Org Foundation, CSIRO
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <math.h>
+#include "arch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
+#else
+#define KISS_FFT_MALLOC opus_alloc
+#endif
+
+#ifdef OPUS_FIXED_POINT
+#include "arch.h"
+
+# define kiss_fft_scalar opus_int32
+# define kiss_twiddle_scalar opus_int16
+
+
+#else
+# ifndef kiss_fft_scalar
+/* default is float */
+# define kiss_fft_scalar float
+# define kiss_twiddle_scalar float
+# define KF_SUFFIX _celt_single
+# endif
+#endif
+
+typedef struct {
+ kiss_fft_scalar r;
+ kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct {
+ kiss_twiddle_scalar r;
+ kiss_twiddle_scalar i;
+}kiss_twiddle_cpx;
+
+#define MAXFACTORS 8
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+typedef struct kiss_fft_state{
+ int nfft;
+#ifndef OPUS_FIXED_POINT
+ kiss_fft_scalar scale;
+#endif
+ int shift;
+ opus_int16 factors[2*MAXFACTORS];
+ const opus_int16 *bitrev;
+ const kiss_twiddle_cpx *twiddles;
+} kiss_fft_state;
+
+/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
+
+/**
+ * opus_fft_alloc
+ *
+ * Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ * typical usage: kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL);
+ *
+ * The return value from fft_alloc is a cfg buffer used internally
+ * by the fft routine or NULL.
+ *
+ * If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc.
+ * The returned value should be free()d when done to avoid memory leaks.
+ *
+ * The state can be placed in a user supplied buffer 'mem':
+ * If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ * then the function places the cfg in mem and the size used in *lenmem
+ * and returns mem.
+ *
+ * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ * then the function returns NULL and places the minimum cfg
+ * buffer size in *lenmem.
+ * */
+
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
+
+/**
+ * opus_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+ f[k].r and f[k].i
+ * */
+void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+void opus_fft_free(const kiss_fft_state *cfg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/celt/laplace.c b/drivers/opus/celt/laplace.c
new file mode 100644
index 0000000000..c6d293f298
--- /dev/null
+++ b/drivers/opus/celt/laplace.c
@@ -0,0 +1,134 @@
+/* Copyright (c) 2007 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "laplace.h"
+#include "mathops.h"
+
+/* The minimum probability of an energy delta (out of 32768). */
+#define LAPLACE_LOG_MINP (0)
+#define LAPLACE_MINP (1<<LAPLACE_LOG_MINP)
+/* The minimum number of guaranteed representable energy deltas (in one
+ direction). */
+#define LAPLACE_NMIN (16)
+
+/* When called, decay is positive and at most 11456. */
+static unsigned ec_laplace_get_freq1(unsigned fs0, int decay)
+{
+ unsigned ft;
+ ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN) - fs0;
+ return ft*(opus_int32)(16384-decay)>>15;
+}
+
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay)
+{
+ unsigned fl;
+ int val = *value;
+ fl = 0;
+ if (val)
+ {
+ int s;
+ int i;
+ s = -(val<0);
+ val = (val+s)^s;
+ fl = fs;
+ fs = ec_laplace_get_freq1(fs, decay);
+ /* Search the decaying part of the PDF.*/
+ for (i=1; fs > 0 && i < val; i++)
+ {
+ fs *= 2;
+ fl += fs+2*LAPLACE_MINP;
+ fs = (fs*(opus_int32)decay)>>15;
+ }
+ /* Everything beyond that has probability LAPLACE_MINP. */
+ if (!fs)
+ {
+ int di;
+ int ndi_max;
+ ndi_max = (32768-fl+LAPLACE_MINP-1)>>LAPLACE_LOG_MINP;
+ ndi_max = (ndi_max-s)>>1;
+ di = IMIN(val - i, ndi_max - 1);
+ fl += (2*di+1+s)*LAPLACE_MINP;
+ fs = IMIN(LAPLACE_MINP, 32768-fl);
+ *value = (i+di+s)^s;
+ }
+ else
+ {
+ fs += LAPLACE_MINP;
+ fl += fs&~s;
+ }
+ celt_assert(fl+fs<=32768);
+ celt_assert(fs>0);
+ }
+ ec_encode_bin(enc, fl, fl+fs, 15);
+}
+
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
+{
+ int val=0;
+ unsigned fl;
+ unsigned fm;
+ fm = ec_decode_bin(dec, 15);
+ fl = 0;
+ if (fm >= fs)
+ {
+ val++;
+ fl = fs;
+ fs = ec_laplace_get_freq1(fs, decay)+LAPLACE_MINP;
+ /* Search the decaying part of the PDF.*/
+ while(fs > LAPLACE_MINP && fm >= fl+2*fs)
+ {
+ fs *= 2;
+ fl += fs;
+ fs = ((fs-2*LAPLACE_MINP)*(opus_int32)decay)>>15;
+ fs += LAPLACE_MINP;
+ val++;
+ }
+ /* Everything beyond that has probability LAPLACE_MINP. */
+ if (fs <= LAPLACE_MINP)
+ {
+ int di;
+ di = (fm-fl)>>(LAPLACE_LOG_MINP+1);
+ val += di;
+ fl += 2*di*LAPLACE_MINP;
+ }
+ if (fm < fl+fs)
+ val = -val;
+ else
+ fl += fs;
+ }
+ celt_assert(fl<32768);
+ celt_assert(fs>0);
+ celt_assert(fl<=fm);
+ celt_assert(fm<IMIN(fl+fs,32768));
+ ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
+ return val;
+}
diff --git a/drivers/opus/celt/laplace.h b/drivers/opus/celt/laplace.h
new file mode 100644
index 0000000000..46c14b5da5
--- /dev/null
+++ b/drivers/opus/celt/laplace.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "entenc.h"
+#include "entdec.h"
+
+/** Encode a value that is assumed to be the realisation of a
+ Laplace-distributed random process
+ @param enc Entropy encoder state
+ @param value Value to encode
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+*/
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
+
+/** Decode a value that is assumed to be the realisation of a
+ Laplace-distributed random process
+ @param dec Entropy decoder state
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+ @return Value decoded
+ */
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
diff --git a/drivers/opus/celt/mathops.c b/drivers/opus/celt/mathops.c
new file mode 100644
index 0000000000..49be746d8c
--- /dev/null
+++ b/drivers/opus/celt/mathops.c
@@ -0,0 +1,208 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+ Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file mathops.h
+ @brief Various math functions
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "mathops.h"
+
+/*Compute floor(sqrt(_val)) with exact arithmetic.
+ This has been tested on all possible 32-bit inputs.*/
+unsigned isqrt32(opus_uint32 _val){
+ unsigned b;
+ unsigned g;
+ int bshift;
+ /*Uses the second method from
+ http://www.azillionmonkeys.com/qed/sqroot.html
+ The main idea is to search for the largest binary digit b such that
+ (g+b)*(g+b) <= _val, and add it to the solution g.*/
+ g=0;
+ bshift=(EC_ILOG(_val)-1)>>1;
+ b=1U<<bshift;
+ do{
+ opus_uint32 t;
+ t=(((opus_uint32)g<<1)+b)<<bshift;
+ if(t<=_val){
+ g+=b;
+ _val-=t;
+ }
+ b>>=1;
+ bshift--;
+ }
+ while(bshift>=0);
+ return g;
+}
+
+#ifdef OPUS_FIXED_POINT
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b)
+{
+ opus_val16 rcp;
+ opus_val32 result, rem;
+ int shift = celt_ilog2(b)-29;
+ a = VSHR32(a,shift);
+ b = VSHR32(b,shift);
+ /* 16-bit reciprocal */
+ rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
+ result = MULT16_32_Q15(rcp, a);
+ rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
+ result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
+ if (result >= 536870912) /* 2^29 */
+ return 2147483647; /* 2^31 - 1 */
+ else if (result <= -536870912) /* -2^29 */
+ return -2147483647; /* -2^31 */
+ else
+ return SHL32(result, 2);
+}
+
+/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
+opus_val16 celt_rsqrt_norm(opus_val32 x)
+{
+ opus_val16 n;
+ opus_val16 r;
+ opus_val16 r2;
+ opus_val16 y;
+ /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
+ n = x-32768;
+ /* Get a rough initial guess for the root.
+ The optimal minimax quadratic approximation (using relative error) is
+ r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
+ Coefficients here, and the final result r, are Q14.*/
+ r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
+ /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
+ We can compute the result from n and r using Q15 multiplies with some
+ adjustment, carefully done to avoid overflow.
+ Range of y is [-1564,1594]. */
+ r2 = MULT16_16_Q15(r, r);
+ y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
+ /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+ This yields the Q14 reciprocal square root of the Q16 x, with a maximum
+ relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
+ peak absolute error of 2.26591/16384. */
+ return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
+ SUB16(MULT16_16_Q15(y, 12288), 16384))));
+}
+
+/** Sqrt approximation (QX input, QX/2 output) */
+opus_val32 celt_sqrt(opus_val32 x)
+{
+ int k;
+ opus_val16 n;
+ opus_val32 rt;
+ static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
+ if (x==0)
+ return 0;
+ else if (x>=1073741824)
+ return 32767;
+ k = (celt_ilog2(x)>>1)-7;
+ x = VSHR32(x, 2*k);
+ n = x-32768;
+ rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
+ MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
+ rt = VSHR32(rt,7-k);
+ return rt;
+}
+
+#define L1 32767
+#define L2 -7651
+#define L3 8277
+#define L4 -626
+
+static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
+{
+ opus_val16 x2;
+
+ x2 = MULT16_16_P15(x,x);
+ return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
+ ))))))));
+}
+
+#undef L1
+#undef L2
+#undef L3
+#undef L4
+
+opus_val16 celt_cos_norm(opus_val32 x)
+{
+ x = x&0x0001ffff;
+ if (x>SHL32(EXTEND32(1), 16))
+ x = SUB32(SHL32(EXTEND32(1), 17),x);
+ if (x&0x00007fff)
+ {
+ if (x<SHL32(EXTEND32(1), 15))
+ {
+ return _celt_cos_pi_2(EXTRACT16(x));
+ } else {
+ return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
+ }
+ } else {
+ if (x&0x0000ffff)
+ return 0;
+ else if (x&0x0001ffff)
+ return -32767;
+ else
+ return 32767;
+ }
+}
+
+/** Reciprocal approximation (Q15 input, Q16 output) */
+opus_val32 celt_rcp(opus_val32 x)
+{
+ int i;
+ opus_val16 n;
+ opus_val16 r;
+ celt_assert2(x>0, "celt_rcp() only defined for positive values");
+ i = celt_ilog2(x);
+ /* n is Q15 with range [0,1). */
+ n = VSHR32(x,i-15)-32768;
+ /* Start with a linear approximation:
+ r = 1.8823529411764706-0.9411764705882353*n.
+ The coefficients and the result are Q14 in the range [15420,30840].*/
+ r = ADD16(30840, MULT16_16_Q15(-15420, n));
+ /* Perform two Newton iterations:
+ r -= r*((r*n)-1.Q15)
+ = r*((r*n)+(r-1.Q15)). */
+ r = SUB16(r, MULT16_16_Q15(r,
+ ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
+ /* We subtract an extra 1 in the second iteration to avoid overflow; it also
+ neatly compensates for truncation error in the rest of the process. */
+ r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
+ ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
+ /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
+ of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+ error of 1.24665/32768. */
+ return VSHR32(EXTEND32(r),i-16);
+}
+
+#endif
diff --git a/drivers/opus/celt/mathops.h b/drivers/opus/celt/mathops.h
new file mode 100644
index 0000000000..4a6bc539bc
--- /dev/null
+++ b/drivers/opus/celt/mathops.h
@@ -0,0 +1,258 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+ Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file mathops.h
+ @brief Various math functions
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MATHOPS_H
+#define MATHOPS_H
+
+#include "arch.h"
+#include "entcode.h"
+#include "os_support.h"
+
+/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
+#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
+
+unsigned isqrt32(opus_uint32 _val);
+
+#ifndef OVERRIDE_CELT_MAXABS16
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+ int i;
+ opus_val16 maxval = 0;
+ opus_val16 minval = 0;
+ for (i=0;i<len;i++)
+ {
+ maxval = MAX16(maxval, x[i]);
+ minval = MIN16(minval, x[i]);
+ }
+ return MAX32(EXTEND32(maxval),-EXTEND32(minval));
+}
+#endif
+
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef OPUS_FIXED_POINT
+static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+ int i;
+ opus_val32 maxval = 0;
+ opus_val32 minval = 0;
+ for (i=0;i<len;i++)
+ {
+ maxval = MAX32(maxval, x[i]);
+ minval = MIN32(minval, x[i]);
+ }
+ return MAX32(maxval, -minval);
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
+
+
+#ifndef OPUS_FIXED_POINT
+
+#define PI 3.141592653f
+#define celt_sqrt(x) ((float)sqrt(x))
+#define celt_rsqrt(x) (1.f/celt_sqrt(x))
+#define celt_rsqrt_norm(x) (celt_rsqrt(x))
+#define celt_cos_norm(x) ((float)cos((.5f*PI)*(x)))
+#define celt_rcp(x) (1.f/(x))
+#define celt_div(a,b) ((a)/(b))
+#define frac_div32(a,b) ((float)(a)/(b))
+
+#ifdef FLOAT_APPROX
+
+/* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127
+ denorm, +/- inf and NaN are *not* handled */
+
+/** Base-2 log approximation (log2(x)). */
+static OPUS_INLINE float celt_log2(float x)
+{
+ int integer;
+ float frac;
+ union {
+ float f;
+ opus_uint32 i;
+ } in;
+ in.f = x;
+ integer = (in.i>>23)-127;
+ in.i -= integer<<23;
+ frac = in.f - 1.5f;
+ frac = -0.41445418f + frac*(0.95909232f
+ + frac*(-0.33951290f + frac*0.16541097f));
+ return 1+integer+frac;
+}
+
+/** Base-2 exponential approximation (2^x). */
+static OPUS_INLINE float celt_exp2(float x)
+{
+ int integer;
+ float frac;
+ union {
+ float f;
+ opus_uint32 i;
+ } res;
+ integer = floor(x);
+ if (integer < -50)
+ return 0;
+ frac = x-integer;
+ /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
+ res.f = 0.99992522f + frac * (0.69583354f
+ + frac * (0.22606716f + 0.078024523f*frac));
+ res.i = (res.i + (integer<<23)) & 0x7fffffff;
+ return res.f;
+}
+
+#else
+#define celt_log2(x) ((float)(1.442695040888963387*log(x)))
+#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x)))
+#endif
+
+#endif
+
+#ifdef OPUS_FIXED_POINT
+
+#include "os_support.h"
+
+#ifndef OVERRIDE_CELT_ILOG2
+/** Integer log in base2. Undefined for zero and negative numbers */
+static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
+{
+ celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers");
+ return EC_ILOG(x)-1;
+}
+#endif
+
+
+/** Integer log in base2. Defined for zero, but not for negative numbers */
+static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
+{
+ return x <= 0 ? 0 : celt_ilog2(x);
+}
+
+opus_val16 celt_rsqrt_norm(opus_val32 x);
+
+opus_val32 celt_sqrt(opus_val32 x);
+
+opus_val16 celt_cos_norm(opus_val32 x);
+
+/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */
+static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
+{
+ int i;
+ opus_val16 n, frac;
+ /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,
+ 0.15530808010959576, -0.08556153059057618 */
+ static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};
+ if (x==0)
+ return -32767;
+ i = celt_ilog2(x);
+ n = VSHR32(x,i-15)-32768-16384;
+ frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4]))))))));
+ return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT);
+}
+
+/*
+ K0 = 1
+ K1 = log(2)
+ K2 = 3-4*log(2)
+ K3 = 3*log(2) - 2
+*/
+#define D0 16383
+#define D1 22804
+#define D2 14819
+#define D3 10204
+
+static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
+{
+ opus_val16 frac;
+ frac = SHL16(x, 4);
+ return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
+}
+/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
+static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
+{
+ int integer;
+ opus_val16 frac;
+ integer = SHR16(x,10);
+ if (integer>14)
+ return 0x7f000000;
+ else if (integer < -15)
+ return 0;
+ frac = celt_exp2_frac(x-SHL16(integer,10));
+ return VSHR32(EXTEND32(frac), -integer-2);
+}
+
+opus_val32 celt_rcp(opus_val32 x);
+
+#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b))
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b);
+
+#define M1 32767
+#define M2 -21
+#define M3 -11943
+#define M4 4936
+
+/* Atan approximation using a 4th order polynomial. Input is in Q15 format
+ and normalized by pi/4. Output is in Q15 format */
+static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x)
+{
+ return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x)))))));
+}
+
+#undef M1
+#undef M2
+#undef M3
+#undef M4
+
+/* atan2() approximation valid for positive input values */
+static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
+{
+ if (y < x)
+ {
+ opus_val32 arg;
+ arg = celt_div(SHL32(EXTEND32(y),15),x);
+ if (arg >= 32767)
+ arg = 32767;
+ return SHR16(celt_atan01(EXTRACT16(arg)),1);
+ } else {
+ opus_val32 arg;
+ arg = celt_div(SHL32(EXTEND32(x),15),y);
+ if (arg >= 32767)
+ arg = 32767;
+ return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1);
+ }
+}
+
+#endif /* OPUS_FIXED_POINT */
+#endif /* MATHOPS_H */
diff --git a/drivers/opus/celt/mdct.c b/drivers/opus/celt/mdct.c
new file mode 100644
index 0000000000..d08d026fac
--- /dev/null
+++ b/drivers/opus/celt/mdct.c
@@ -0,0 +1,311 @@
+ /* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2008 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+ to do most of the work. It should be relatively straightforward to
+ plug in pretty much and FFT here.
+
+ This replaces the Vorbis FFT (and uses the exact same API), which
+ was a bit too messy and that was ending up duplicating code
+ (might as well use the same FFT everywhere).
+
+ The algorithm is similar to (and inspired from) Fabrice Bellard's
+ MDCT implementation in FFMPEG, but has differences in signs, ordering
+ and scaling in many places.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#endif
+
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+#ifdef CUSTOM_MODES
+
+int clt_mdct_init(celt_mdct_lookup *l,int N, int maxshift)
+{
+ int i;
+ int N4;
+ kiss_twiddle_scalar *trig;
+#if defined(OPUS_FIXED_POINT)
+ int N2=N>>1;
+#endif
+ l->n = N;
+ N4 = N>>2;
+ l->maxshift = maxshift;
+ for (i=0;i<=maxshift;i++)
+ {
+ if (i==0)
+ l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
+ else
+ l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
+#ifndef ENABLE_TI_DSPLIB55
+ if (l->kfft[i]==NULL)
+ return 0;
+#endif
+ }
+ l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
+ if (l->trig==NULL)
+ return 0;
+ /* We have enough points that sine isn't necessary */
+#if defined(OPUS_FIXED_POINT)
+ for (i=0;i<=N4;i++)
+ trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
+#else
+ for (i=0;i<=N4;i++)
+ trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
+#endif
+ return 1;
+}
+
+void clt_mdct_clear(celt_mdct_lookup *l)
+{
+ int i;
+ for (i=0;i<=l->maxshift;i++)
+ opus_fft_free(l->kfft[i]);
+ opus_free((kiss_twiddle_scalar*)l->trig);
+}
+
+#endif /* CUSTOM_MODES */
+
+/* Forward MDCT trashes the input array */
+void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window, int overlap, int shift, int stride)
+{
+ int i;
+ int N, N2, N4;
+ kiss_twiddle_scalar sine;
+ VARDECL(kiss_fft_scalar, f);
+ VARDECL(kiss_fft_scalar, f2);
+ SAVE_STACK;
+ N = l->n;
+ N >>= shift;
+ N2 = N>>1;
+ N4 = N>>2;
+ ALLOC(f, N2, kiss_fft_scalar);
+ ALLOC(f2, N2, kiss_fft_scalar);
+ /* sin(x) ~= x here */
+#ifdef OPUS_FIXED_POINT
+ sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+ sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+
+ /* Consider the input to be composed of four blocks: [a, b, c, d] */
+ /* Window, shuffle, fold */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+ const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+ kiss_fft_scalar * OPUS_RESTRICT yp = f;
+ const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+ const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+ for(i=0;i<((overlap+3)>>2);i++)
+ {
+ /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+ *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+ *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
+ xp1+=2;
+ xp2-=2;
+ wp1+=2;
+ wp2-=2;
+ }
+ wp1 = window;
+ wp2 = window+overlap-1;
+ for(;i<N4-((overlap+3)>>2);i++)
+ {
+ /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+ *yp++ = *xp2;
+ *yp++ = *xp1;
+ xp1+=2;
+ xp2-=2;
+ }
+ for(;i<N4;i++)
+ {
+ /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+ *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+ *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
+ xp1+=2;
+ xp2-=2;
+ wp1+=2;
+ wp2-=2;
+ }
+ }
+ /* Pre-rotation */
+ {
+ kiss_fft_scalar * OPUS_RESTRICT yp = f;
+ const kiss_twiddle_scalar *t = &l->trig[0];
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_scalar re, im, yr, yi;
+ re = yp[0];
+ im = yp[1];
+ yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
+ yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
+ /* works because the cos is nearly one */
+ *yp++ = yr + S_MUL(yi,sine);
+ *yp++ = yi - S_MUL(yr,sine);
+ }
+ }
+
+ /* N/4 complex FFT, down-scales by 4/N */
+ opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
+
+ /* Post-rotate */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
+ kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+ kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+ const kiss_twiddle_scalar *t = &l->trig[0];
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_scalar yr, yi;
+ yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
+ yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
+ /* works because the cos is nearly one */
+ *yp1 = yr - S_MUL(yi,sine);
+ *yp2 = yi + S_MUL(yr,sine);;
+ fp += 2;
+ yp1 += 2*stride;
+ yp2 -= 2*stride;
+ }
+ }
+ RESTORE_STACK;
+}
+
+void clt_mdct_backward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
+{
+ int i;
+ int N, N2, N4;
+ kiss_twiddle_scalar sine;
+ VARDECL(kiss_fft_scalar, f2);
+ SAVE_STACK;
+ N = l->n;
+ N >>= shift;
+ N2 = N>>1;
+ N4 = N>>2;
+ ALLOC(f2, N2, kiss_fft_scalar);
+ /* sin(x) ~= x here */
+#ifdef OPUS_FIXED_POINT
+ sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+ sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+
+ /* Pre-rotate */
+ {
+ /* Temp pointers to make it really clear to the compiler what we're doing */
+ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+ const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+ kiss_fft_scalar * OPUS_RESTRICT yp = f2;
+ const kiss_twiddle_scalar *t = &l->trig[0];
+ for(i=0;i<N4;i++)
+ {
+ kiss_fft_scalar yr, yi;
+ yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
+ yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
+ /* works because the cos is nearly one */
+ *yp++ = yr - S_MUL(yi,sine);
+ *yp++ = yi + S_MUL(yr,sine);
+ xp1+=2*stride;
+ xp2-=2*stride;
+ }
+ }
+
+ /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
+ opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
+
+ /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+ it in-place. */
+ {
+ kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
+ kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
+ const kiss_twiddle_scalar *t = &l->trig[0];
+ /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+ middle pair will be computed twice. */
+ for(i=0;i<(N4+1)>>1;i++)
+ {
+ kiss_fft_scalar re, im, yr, yi;
+ kiss_twiddle_scalar t0, t1;
+ re = yp0[0];
+ im = yp0[1];
+ t0 = t[i<<shift];
+ t1 = t[(N4-i)<<shift];
+ /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+ yr = S_MUL(re,t0) - S_MUL(im,t1);
+ yi = S_MUL(im,t0) + S_MUL(re,t1);
+ re = yp1[0];
+ im = yp1[1];
+ /* works because the cos is nearly one */
+ yp0[0] = -(yr - S_MUL(yi,sine));
+ yp1[1] = yi + S_MUL(yr,sine);
+
+ t0 = t[(N4-i-1)<<shift];
+ t1 = t[(i+1)<<shift];
+ /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+ yr = S_MUL(re,t0) - S_MUL(im,t1);
+ yi = S_MUL(im,t0) + S_MUL(re,t1);
+ /* works because the cos is nearly one */
+ yp1[0] = -(yr - S_MUL(yi,sine));
+ yp0[1] = yi + S_MUL(yr,sine);
+ yp0 += 2;
+ yp1 -= 2;
+ }
+ }
+
+ /* Mirror on both sides for TDAC */
+ {
+ kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+ kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+ const opus_val16 * OPUS_RESTRICT wp1 = window;
+ const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+ for(i = 0; i < overlap/2; i++)
+ {
+ kiss_fft_scalar x1, x2;
+ x1 = *xp1;
+ x2 = *yp1;
+ *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+ *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+ wp1++;
+ wp2--;
+ }
+ }
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/celt/mdct.h b/drivers/opus/celt/mdct.h
new file mode 100644
index 0000000000..4e7a199246
--- /dev/null
+++ b/drivers/opus/celt/mdct.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2008 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+ to do most of the work. It should be relatively straightforward to
+ plug in pretty much and FFT here.
+
+ This replaces the Vorbis FFT (and uses the exact same API), which
+ was a bit too messy and that was ending up duplicating code
+ (might as well use the same FFT everywhere).
+
+ The algorithm is similar to (and inspired from) Fabrice Bellard's
+ MDCT implementation in FFMPEG, but has differences in signs, ordering
+ and scaling in many places.
+*/
+
+#ifndef MDCT_H
+#define MDCT_H
+
+#include "opus_defines.h"
+#include "kiss_fft.h"
+#include "arch.h"
+
+typedef struct {
+ int n;
+ int maxshift;
+ const kiss_fft_state *kfft[4];
+ const kiss_twiddle_scalar * OPUS_RESTRICT trig;
+} celt_mdct_lookup;
+
+int clt_mdct_init(celt_mdct_lookup *l,int N, int maxshift);
+void clt_mdct_clear(celt_mdct_lookup *l);
+
+/** Compute a forward MDCT and scale by 4/N, trashes the input array */
+void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window, int overlap, int shift, int stride);
+
+/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
+ (scales implicitly by 1/2) */
+void clt_mdct_backward(const celt_mdct_lookup *l, kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
+
+#endif
diff --git a/drivers/opus/celt/mfrngcod.h b/drivers/opus/celt/mfrngcod.h
new file mode 100644
index 0000000000..809152a59a
--- /dev/null
+++ b/drivers/opus/celt/mfrngcod.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2001-2008 Timothy B. Terriberry
+ Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_mfrngcode_H)
+# define _mfrngcode_H (1)
+# include "entcode.h"
+
+/*Constants used by the entropy encoder/decoder.*/
+
+/*The number of bits to output at a time.*/
+# define EC_SYM_BITS (8)
+/*The total number of bits in each of the state registers.*/
+# define EC_CODE_BITS (32)
+/*The maximum symbol value.*/
+# define EC_SYM_MAX ((1U<<EC_SYM_BITS)-1)
+/*Bits to shift by to move a symbol into the high-order position.*/
+# define EC_CODE_SHIFT (EC_CODE_BITS-EC_SYM_BITS-1)
+/*Carry bit of the high-order range symbol.*/
+# define EC_CODE_TOP (((opus_uint32)1U)<<(EC_CODE_BITS-1))
+/*Low-order bit of the high-order range symbol.*/
+# define EC_CODE_BOT (EC_CODE_TOP>>EC_SYM_BITS)
+/*The number of bits available for the last, partial symbol in the code field.*/
+# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1)
+#endif
diff --git a/drivers/opus/celt/modes.c b/drivers/opus/celt/modes.c
new file mode 100644
index 0000000000..3794074aaa
--- /dev/null
+++ b/drivers/opus/celt/modes.c
@@ -0,0 +1,438 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2008 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "celt.h"
+#include "opus_modes.h"
+#include "rate.h"
+#include "os_support.h"
+#include "stack_alloc.h"
+#include "quant_bands.h"
+
+static const opus_int16 eband5ms[] = {
+/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
+};
+
+/* Alternate tuning (partially derived from Vorbis) */
+#define BITALLOC_SIZE 11
+/* Bit allocation table in units of 1/32 bit/sample (0.1875 dB SNR) */
+static const unsigned char band_allocation[] = {
+/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 90, 80, 75, 69, 63, 56, 49, 40, 34, 29, 20, 18, 10, 0, 0, 0, 0, 0, 0, 0, 0,
+110,100, 90, 84, 78, 71, 65, 58, 51, 45, 39, 32, 26, 20, 12, 0, 0, 0, 0, 0, 0,
+118,110,103, 93, 86, 80, 75, 70, 65, 59, 53, 47, 40, 31, 23, 15, 4, 0, 0, 0, 0,
+126,119,112,104, 95, 89, 83, 78, 72, 66, 60, 54, 47, 39, 32, 25, 17, 12, 1, 0, 0,
+134,127,120,114,103, 97, 91, 85, 78, 72, 66, 60, 54, 47, 41, 35, 29, 23, 16, 10, 1,
+144,137,130,124,113,107,101, 95, 88, 82, 76, 70, 64, 57, 51, 45, 39, 33, 26, 15, 1,
+152,145,138,132,123,117,111,105, 98, 92, 86, 80, 74, 67, 61, 55, 49, 43, 36, 20, 1,
+162,155,148,142,133,127,121,115,108,102, 96, 90, 84, 77, 71, 65, 59, 53, 46, 30, 1,
+172,165,158,152,143,137,131,125,118,112,106,100, 94, 87, 81, 75, 69, 63, 56, 45, 20,
+200,200,200,200,200,200,200,200,198,193,188,183,178,173,168,163,158,153,148,129,104,
+};
+
+#ifndef CUSTOM_MODES_ONLY
+ #ifdef OPUS_FIXED_POINT
+ #include "static_modes_fixed.h"
+ #else
+ #include "static_modes_float.h"
+ #endif
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+#ifdef CUSTOM_MODES
+
+/* Defining 25 critical bands for the full 0-20 kHz audio bandwidth
+ Taken from http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html */
+#define BARK_BANDS 25
+static const opus_int16 bark_freq[BARK_BANDS+1] = {
+ 0, 100, 200, 300, 400,
+ 510, 630, 770, 920, 1080,
+ 1270, 1480, 1720, 2000, 2320,
+ 2700, 3150, 3700, 4400, 5300,
+ 6400, 7700, 9500, 12000, 15500,
+ 20000};
+
+static opus_int16 *compute_ebands(opus_int32 Fs, int frame_size, int res, int *nbEBands)
+{
+ opus_int16 *eBands;
+ int i, j, lin, low, high, nBark, offset=0;
+
+ /* All modes that have 2.5 ms short blocks use the same definition */
+ if (Fs == 400*(opus_int32)frame_size)
+ {
+ *nbEBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+ eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+1));
+ for (i=0;i<*nbEBands+1;i++)
+ eBands[i] = eband5ms[i];
+ return eBands;
+ }
+ /* Find the number of critical bands supported by our sampling rate */
+ for (nBark=1;nBark<BARK_BANDS;nBark++)
+ if (bark_freq[nBark+1]*2 >= Fs)
+ break;
+
+ /* Find where the linear part ends (i.e. where the spacing is more than min_width */
+ for (lin=0;lin<nBark;lin++)
+ if (bark_freq[lin+1]-bark_freq[lin] >= res)
+ break;
+
+ low = (bark_freq[lin]+res/2)/res;
+ high = nBark-lin;
+ *nbEBands = low+high;
+ eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+2));
+
+ if (eBands==NULL)
+ return NULL;
+
+ /* Linear spacing (min_width) */
+ for (i=0;i<low;i++)
+ eBands[i] = i;
+ if (low>0)
+ offset = eBands[low-1]*res - bark_freq[lin-1];
+ /* Spacing follows critical bands */
+ for (i=0;i<high;i++)
+ {
+ int target = bark_freq[lin+i];
+ /* Round to an even value */
+ eBands[i+low] = (target+offset/2+res)/(2*res)*2;
+ offset = eBands[i+low]*res - target;
+ }
+ /* Enforce the minimum spacing at the boundary */
+ for (i=0;i<*nbEBands;i++)
+ if (eBands[i] < i)
+ eBands[i] = i;
+ /* Round to an even value */
+ eBands[*nbEBands] = (bark_freq[nBark]+res)/(2*res)*2;
+ if (eBands[*nbEBands] > frame_size)
+ eBands[*nbEBands] = frame_size;
+ for (i=1;i<*nbEBands-1;i++)
+ {
+ if (eBands[i+1]-eBands[i] < eBands[i]-eBands[i-1])
+ {
+ eBands[i] -= (2*eBands[i]-eBands[i-1]-eBands[i+1])/2;
+ }
+ }
+ /* Remove any empty bands. */
+ for (i=j=0;i<*nbEBands;i++)
+ if(eBands[i+1]>eBands[j])
+ eBands[++j]=eBands[i+1];
+ *nbEBands=j;
+
+ for (i=1;i<*nbEBands;i++)
+ {
+ /* Every band must be smaller than the last band. */
+ celt_assert(eBands[i]-eBands[i-1]<=eBands[*nbEBands]-eBands[*nbEBands-1]);
+ /* Each band must be no larger than twice the size of the previous one. */
+ celt_assert(eBands[i+1]-eBands[i]<=2*(eBands[i]-eBands[i-1]));
+ }
+
+ return eBands;
+}
+
+static void compute_allocation_table(CELTMode *mode)
+{
+ int i, j;
+ unsigned char *allocVectors;
+ int maxBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+
+ mode->nbAllocVectors = BITALLOC_SIZE;
+ allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands));
+ if (allocVectors==NULL)
+ return;
+
+ /* Check for standard mode */
+ if (mode->Fs == 400*(opus_int32)mode->shortMdctSize)
+ {
+ for (i=0;i<BITALLOC_SIZE*mode->nbEBands;i++)
+ allocVectors[i] = band_allocation[i];
+ mode->allocVectors = allocVectors;
+ return;
+ }
+ /* If not the standard mode, interpolate */
+ /* Compute per-codec-band allocation from per-critical-band matrix */
+ for (i=0;i<BITALLOC_SIZE;i++)
+ {
+ for (j=0;j<mode->nbEBands;j++)
+ {
+ int k;
+ for (k=0;k<maxBands;k++)
+ {
+ if (400*(opus_int32)eband5ms[k] > mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize)
+ break;
+ }
+ if (k>maxBands-1)
+ allocVectors[i*mode->nbEBands+j] = band_allocation[i*maxBands + maxBands-1];
+ else {
+ opus_int32 a0, a1;
+ a1 = mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize - 400*(opus_int32)eband5ms[k-1];
+ a0 = 400*(opus_int32)eband5ms[k] - mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize;
+ allocVectors[i*mode->nbEBands+j] = (a0*band_allocation[i*maxBands+k-1]
+ + a1*band_allocation[i*maxBands+k])/(a0+a1);
+ }
+ }
+ }
+
+ /*printf ("\n");
+ for (i=0;i<BITALLOC_SIZE;i++)
+ {
+ for (j=0;j<mode->nbEBands;j++)
+ printf ("%d ", allocVectors[i*mode->nbEBands+j]);
+ printf ("\n");
+ }
+ exit(0);*/
+
+ mode->allocVectors = allocVectors;
+}
+
+#endif /* CUSTOM_MODES */
+
+CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
+{
+ int i;
+#ifdef CUSTOM_MODES
+ CELTMode *mode=NULL;
+ int res;
+ opus_val16 *window;
+ opus_int16 *logN;
+ int LM;
+ ALLOC_STACK;
+#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
+ if (global_stack==NULL)
+ goto failure;
+#endif
+#endif
+
+#ifndef CUSTOM_MODES_ONLY
+ for (i=0;i<TOTAL_MODES;i++)
+ {
+ int j;
+ for (j=0;j<4;j++)
+ {
+ if (Fs == static_mode_list[i]->Fs &&
+ (frame_size<<j) == static_mode_list[i]->shortMdctSize*static_mode_list[i]->nbShortMdcts)
+ {
+ if (error)
+ *error = OPUS_OK;
+ return (CELTMode*)static_mode_list[i];
+ }
+ }
+ }
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef CUSTOM_MODES
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+#else
+
+ /* The good thing here is that permutation of the arguments will automatically be invalid */
+
+ if (Fs < 8000 || Fs > 96000)
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+ if (frame_size < 40 || frame_size > 1024 || frame_size%2!=0)
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+ /* Frames of less than 1ms are not supported. */
+ if ((opus_int32)frame_size*1000 < Fs)
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+
+ if ((opus_int32)frame_size*75 >= Fs && (frame_size%16)==0)
+ {
+ LM = 3;
+ } else if ((opus_int32)frame_size*150 >= Fs && (frame_size%8)==0)
+ {
+ LM = 2;
+ } else if ((opus_int32)frame_size*300 >= Fs && (frame_size%4)==0)
+ {
+ LM = 1;
+ } else
+ {
+ LM = 0;
+ }
+
+ /* Shorts longer than 3.3ms are not supported. */
+ if ((opus_int32)(frame_size>>LM)*300 > Fs)
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+
+ mode = opus_alloc(sizeof(CELTMode));
+ if (mode==NULL)
+ goto failure;
+ mode->Fs = Fs;
+
+ /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis
+ is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should
+ approximate that. */
+ if(Fs < 12000) /* 8 kHz */
+ {
+ mode->preemph[0] = QCONST16(0.3500061035f, 15);
+ mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+ mode->preemph[2] = QCONST16(0.2719968125f, SIG_SHIFT); /* exact 1/preemph[3] */
+ mode->preemph[3] = QCONST16(3.6765136719f, 13);
+ } else if(Fs < 24000) /* 16 kHz */
+ {
+ mode->preemph[0] = QCONST16(0.6000061035f, 15);
+ mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+ mode->preemph[2] = QCONST16(0.4424998650f, SIG_SHIFT); /* exact 1/preemph[3] */
+ mode->preemph[3] = QCONST16(2.2598876953f, 13);
+ } else if(Fs < 40000) /* 32 kHz */
+ {
+ mode->preemph[0] = QCONST16(0.7799987793f, 15);
+ mode->preemph[1] = -QCONST16(0.1000061035f, 15);
+ mode->preemph[2] = QCONST16(0.7499771125f, SIG_SHIFT); /* exact 1/preemph[3] */
+ mode->preemph[3] = QCONST16(1.3333740234f, 13);
+ } else /* 48 kHz */
+ {
+ mode->preemph[0] = QCONST16(0.8500061035f, 15);
+ mode->preemph[1] = QCONST16(0.0f, 15);
+ mode->preemph[2] = QCONST16(1.f, SIG_SHIFT);
+ mode->preemph[3] = QCONST16(1.f, 13);
+ }
+
+ mode->maxLM = LM;
+ mode->nbShortMdcts = 1<<LM;
+ mode->shortMdctSize = frame_size/mode->nbShortMdcts;
+ res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize);
+
+ mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands);
+ if (mode->eBands==NULL)
+ goto failure;
+#if !defined(SMALL_FOOTPRINT)
+ /* Make sure we don't allocate a band larger than our PVQ table.
+ 208 should be enough, but let's be paranoid. */
+ if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM >
+ 208) {
+ goto failure;
+ }
+#endif
+
+ mode->effEBands = mode->nbEBands;
+ while (mode->eBands[mode->effEBands] > mode->shortMdctSize)
+ mode->effEBands--;
+
+ /* Overlap must be divisible by 4 */
+ mode->overlap = ((mode->shortMdctSize>>2)<<2);
+
+ compute_allocation_table(mode);
+ if (mode->allocVectors==NULL)
+ goto failure;
+
+ window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16));
+ if (window==NULL)
+ goto failure;
+
+#ifndef OPUS_FIXED_POINT
+ for (i=0;i<mode->overlap;i++)
+ window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap));
+#else
+ for (i=0;i<mode->overlap;i++)
+ window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap))));
+#endif
+ mode->window = window;
+
+ logN = (opus_int16*)opus_alloc(mode->nbEBands*sizeof(opus_int16));
+ if (logN==NULL)
+ goto failure;
+
+ for (i=0;i<mode->nbEBands;i++)
+ logN[i] = log2_frac(mode->eBands[i+1]-mode->eBands[i], BITRES);
+ mode->logN = logN;
+
+ compute_pulse_cache(mode, mode->maxLM);
+
+ if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
+ mode->maxLM) == 0)
+ goto failure;
+
+ if (error)
+ *error = OPUS_OK;
+
+ return mode;
+failure:
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ if (mode!=NULL)
+ opus_custom_mode_destroy(mode);
+ return NULL;
+#endif /* !CUSTOM_MODES */
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_mode_destroy(CELTMode *mode)
+{
+ if (mode == NULL)
+ return;
+#ifndef CUSTOM_MODES_ONLY
+ {
+ int i;
+ for (i=0;i<TOTAL_MODES;i++)
+ {
+ if (mode == static_mode_list[i])
+ {
+ return;
+ }
+ }
+ }
+#endif /* CUSTOM_MODES_ONLY */
+ opus_free((opus_int16*)mode->eBands);
+ opus_free((opus_int16*)mode->allocVectors);
+
+ opus_free((opus_val16*)mode->window);
+ opus_free((opus_int16*)mode->logN);
+
+ opus_free((opus_int16*)mode->cache.index);
+ opus_free((unsigned char*)mode->cache.bits);
+ opus_free((unsigned char*)mode->cache.caps);
+ clt_mdct_clear(&mode->mdct);
+
+ opus_free((CELTMode *)mode);
+}
+#endif
diff --git a/drivers/opus/celt/opus_custom_demo.c b/drivers/opus/celt/opus_custom_demo.c
new file mode 100644
index 0000000000..8c7f58b6e6
--- /dev/null
+++ b/drivers/opus/celt/opus_custom_demo.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_custom.h"
+#include "arch.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define MAX_PACKET 1275
+
+int main(int argc, char *argv[])
+{
+ int err;
+ char *inFile, *outFile;
+ FILE *fin, *fout;
+ OpusCustomMode *mode=NULL;
+ OpusCustomEncoder *enc;
+ OpusCustomDecoder *dec;
+ int len;
+ opus_int32 frame_size, channels, rate;
+ int bytes_per_packet;
+ unsigned char data[MAX_PACKET];
+ int complexity;
+#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+ int i;
+ double rmsd = 0;
+#endif
+ int count = 0;
+ opus_int32 skip;
+ opus_int16 *in, *out;
+ if (argc != 9 && argc != 8 && argc != 7)
+ {
+ fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> "
+ " <bytes per packet> [<complexity> [packet loss rate]] "
+ "<input> <output>\n");
+ return 1;
+ }
+
+ rate = (opus_int32)atol(argv[1]);
+ channels = atoi(argv[2]);
+ frame_size = atoi(argv[3]);
+ mode = opus_custom_mode_create(rate, frame_size, NULL);
+ if (mode == NULL)
+ {
+ fprintf(stderr, "failed to create a mode\n");
+ return 1;
+ }
+
+ bytes_per_packet = atoi(argv[4]);
+ if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
+ {
+ fprintf (stderr, "bytes per packet must be between 0 and %d\n",
+ MAX_PACKET);
+ return 1;
+ }
+
+ inFile = argv[argc-2];
+ fin = fopen(inFile, "rb");
+ if (!fin)
+ {
+ fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
+ return 1;
+ }
+ outFile = argv[argc-1];
+ fout = fopen(outFile, "wb+");
+ if (!fout)
+ {
+ fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
+ fclose(fin);
+ return 1;
+ }
+
+ enc = opus_custom_encoder_create(mode, channels, &err);
+ if (err != 0)
+ {
+ fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err));
+ fclose(fin);
+ fclose(fout);
+ return 1;
+ }
+ dec = opus_custom_decoder_create(mode, channels, &err);
+ if (err != 0)
+ {
+ fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err));
+ fclose(fin);
+ fclose(fout);
+ return 1;
+ }
+ opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip));
+
+ if (argc>7)
+ {
+ complexity=atoi(argv[5]);
+ opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity));
+ }
+
+ in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+ out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+
+ while (!feof(fin))
+ {
+ int ret;
+ err = fread(in, sizeof(short), frame_size*channels, fin);
+ if (feof(fin))
+ break;
+ len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet);
+ if (len <= 0)
+ fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len));
+
+ /* This is for simulating bit errors */
+#if 0
+ int errors = 0;
+ int eid = 0;
+ /* This simulates random bit error */
+ for (i=0;i<len*8;i++)
+ {
+ if (rand()%atoi(argv[8])==0)
+ {
+ if (i<64)
+ {
+ errors++;
+ eid = i;
+ }
+ data[i/8] ^= 1<<(7-(i%8));
+ }
+ }
+ if (errors == 1)
+ data[eid/8] ^= 1<<(7-(eid%8));
+ else if (errors%2 == 1)
+ data[rand()%8] ^= 1<<rand()%8;
+#endif
+
+#if 1 /* Set to zero to use the encoder's output instead */
+ /* This is to simulate packet loss */
+ if (argc==9 && rand()%1000<atoi(argv[argc-3]))
+ /*if (errors && (errors%2==0))*/
+ ret = opus_custom_decode(dec, NULL, len, out, frame_size);
+ else
+ ret = opus_custom_decode(dec, data, len, out, frame_size);
+ if (ret < 0)
+ fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret));
+#else
+ for (i=0;i<ret*channels;i++)
+ out[i] = in[i];
+#endif
+#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+ for (i=0;i<ret*channels;i++)
+ {
+ rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]);
+ /*out[i] -= in[i];*/
+ }
+#endif
+ count++;
+ fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout);
+ skip = 0;
+ }
+ PRINT_MIPS(stderr);
+
+ opus_custom_encoder_destroy(enc);
+ opus_custom_decoder_destroy(dec);
+ fclose(fin);
+ fclose(fout);
+ opus_custom_mode_destroy(mode);
+ free(in);
+ free(out);
+#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+ if (rmsd > 0)
+ {
+ rmsd = sqrt(rmsd/(1.0*frame_size*channels*count));
+ fprintf (stderr, "Error: encoder doesn't match decoder\n");
+ fprintf (stderr, "RMS mismatch is %f\n", rmsd);
+ return 1;
+ } else {
+ fprintf (stderr, "Encoder matches decoder!!\n");
+ }
+#endif
+ return 0;
+}
+
diff --git a/drivers/opus/celt/opus_modes.h b/drivers/opus/celt/opus_modes.h
new file mode 100644
index 0000000000..a1df46265e
--- /dev/null
+++ b/drivers/opus/celt/opus_modes.h
@@ -0,0 +1,83 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2008 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OPUS_MODES_H
+#define OPUS_MODES_H
+
+#include "opus_types.h"
+#include "celt.h"
+#include "arch.h"
+#include "mdct.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#define MAX_PERIOD 1024
+
+#ifndef OVERLAP
+#define OVERLAP(mode) ((mode)->overlap)
+#endif
+
+#ifndef FRAMESIZE
+#define FRAMESIZE(mode) ((mode)->mdctSize)
+#endif
+
+typedef struct {
+ int size;
+ const opus_int16 *index;
+ const unsigned char *bits;
+ const unsigned char *caps;
+} PulseCache;
+
+/** Mode definition (opaque)
+ @brief Mode definition
+ */
+struct OpusCustomMode {
+ opus_int32 Fs;
+ int overlap;
+
+ int nbEBands;
+ int effEBands;
+ opus_val16 preemph[4];
+ const opus_int16 *eBands; /**< Definition for each "pseudo-critical band" */
+
+ int maxLM;
+ int nbShortMdcts;
+ int shortMdctSize;
+
+ int nbAllocVectors; /**< Number of lines in the matrix below */
+ const unsigned char *allocVectors; /**< Number of bits in each band for several rates */
+ const opus_int16 *logN;
+
+ const opus_val16 *window;
+ celt_mdct_lookup mdct;
+ PulseCache cache;
+};
+
+
+#endif
diff --git a/drivers/opus/celt/os_support.h b/drivers/opus/celt/os_support.h
new file mode 100644
index 0000000000..5e47e3cff9
--- /dev/null
+++ b/drivers/opus/celt/os_support.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2007 Jean-Marc Valin
+
+ File: os_support.h
+ This is the (tiny) OS abstraction layer. Aside from math.h, this is the
+ only place where system headers are allowed.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OS_SUPPORT_H
+#define OS_SUPPORT_H
+
+#ifdef CUSTOM_SUPPORT
+# include "custom_support.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
+#ifndef OVERRIDE_OPUS_ALLOC
+static OPUS_INLINE void *opus_alloc (size_t size)
+{
+ return malloc(size);
+}
+#endif
+
+/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
+#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
+static OPUS_INLINE void *opus_alloc_scratch (size_t size)
+{
+ /* Scratch space doesn't need to be cleared */
+ return opus_alloc(size);
+}
+#endif
+
+/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
+#ifndef OVERRIDE_OPUS_FREE
+static OPUS_INLINE void opus_free (void *ptr)
+{
+ free(ptr);
+}
+#endif
+
+/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */
+#ifndef OVERRIDE_OPUS_COPY
+#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term
+ provides compile-time type checking */
+#ifndef OVERRIDE_OPUS_MOVE
+#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Set n elements of dst to zero, starting at address s */
+#ifndef OVERRIDE_OPUS_CLEAR
+#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
+#endif
+
+/*#ifdef __GNUC__
+#pragma GCC poison printf sprintf
+#pragma GCC poison malloc free realloc calloc
+#endif*/
+
+#endif /* OS_SUPPORT_H */
+
diff --git a/drivers/opus/celt/pitch.c b/drivers/opus/celt/pitch.c
new file mode 100644
index 0000000000..48cd02fb2b
--- /dev/null
+++ b/drivers/opus/celt/pitch.c
@@ -0,0 +1,537 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file pitch.c
+ @brief Pitch analysis
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "pitch.h"
+#include "os_support.h"
+#include "opus_modes.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "celt_lpc.h"
+
+static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
+ int max_pitch, int *best_pitch
+#ifdef OPUS_FIXED_POINT
+ , int yshift, opus_val32 maxcorr
+#endif
+ )
+{
+ int i, j;
+ opus_val32 Syy=1;
+ opus_val16 best_num[2];
+ opus_val32 best_den[2];
+#ifdef OPUS_FIXED_POINT
+ int xshift;
+
+ xshift = celt_ilog2(maxcorr)-14;
+#endif
+
+ best_num[0] = -1;
+ best_num[1] = -1;
+ best_den[0] = 0;
+ best_den[1] = 0;
+ best_pitch[0] = 0;
+ best_pitch[1] = 1;
+ for (j=0;j<len;j++)
+ Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
+ for (i=0;i<max_pitch;i++)
+ {
+ if (xcorr[i]>0)
+ {
+ opus_val16 num;
+ opus_val32 xcorr16;
+ xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
+#ifndef OPUS_FIXED_POINT
+ /* Considering the range of xcorr16, this should avoid both underflows
+ and overflows (inf) when squaring xcorr16 */
+ xcorr16 *= 1e-12f;
+#endif
+ num = MULT16_16_Q15(xcorr16,xcorr16);
+ if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
+ {
+ if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
+ {
+ best_num[1] = best_num[0];
+ best_den[1] = best_den[0];
+ best_pitch[1] = best_pitch[0];
+ best_num[0] = num;
+ best_den[0] = Syy;
+ best_pitch[0] = i;
+ } else {
+ best_num[1] = num;
+ best_den[1] = Syy;
+ best_pitch[1] = i;
+ }
+ }
+ }
+ Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
+ Syy = MAX32(1, Syy);
+ }
+}
+
+static void celt_fir5(const opus_val16 *x,
+ const opus_val16 *num,
+ opus_val16 *y,
+ int N,
+ opus_val16 *mem)
+{
+ int i;
+ opus_val16 num0, num1, num2, num3, num4;
+ opus_val32 mem0, mem1, mem2, mem3, mem4;
+ num0=num[0];
+ num1=num[1];
+ num2=num[2];
+ num3=num[3];
+ num4=num[4];
+ mem0=mem[0];
+ mem1=mem[1];
+ mem2=mem[2];
+ mem3=mem[3];
+ mem4=mem[4];
+ for (i=0;i<N;i++)
+ {
+ opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+ sum = MAC16_16(sum,num0,mem0);
+ sum = MAC16_16(sum,num1,mem1);
+ sum = MAC16_16(sum,num2,mem2);
+ sum = MAC16_16(sum,num3,mem3);
+ sum = MAC16_16(sum,num4,mem4);
+ mem4 = mem3;
+ mem3 = mem2;
+ mem2 = mem1;
+ mem1 = mem0;
+ mem0 = x[i];
+ y[i] = ROUND16(sum, SIG_SHIFT);
+ }
+ mem[0]=mem0;
+ mem[1]=mem1;
+ mem[2]=mem2;
+ mem[3]=mem3;
+ mem[4]=mem4;
+}
+
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+ int len, int C, int arch)
+{
+ int i;
+ opus_val32 ac[5];
+ opus_val16 tmp=Q15ONE;
+ opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+ opus_val16 lpc2[5];
+ opus_val16 c1 = QCONST16(.8f,15);
+#ifdef OPUS_FIXED_POINT
+ int shift;
+ opus_val32 maxabs = celt_maxabs32(x[0], len);
+ if (C==2)
+ {
+ opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
+ maxabs = MAX32(maxabs, maxabs_1);
+ }
+ if (maxabs<1)
+ maxabs=1;
+ shift = celt_ilog2(maxabs)-10;
+ if (shift<0)
+ shift=0;
+ if (C==2)
+ shift++;
+#endif
+ for (i=1;i<len>>1;i++)
+ x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
+ x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
+ if (C==2)
+ {
+ for (i=1;i<len>>1;i++)
+ x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
+ x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
+ }
+
+ _celt_autocorr(x_lp, ac, NULL, 0,
+ 4, len>>1, arch);
+
+ /* Noise floor -40 dB */
+#ifdef OPUS_FIXED_POINT
+ ac[0] += SHR32(ac[0],13);
+#else
+ ac[0] *= 1.0001f;
+#endif
+ /* Lag windowing */
+ for (i=1;i<=4;i++)
+ {
+ /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef OPUS_FIXED_POINT
+ ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+ ac[i] -= ac[i]*(.008f*i)*(.008f*i);
+#endif
+ }
+
+ _celt_lpc(lpc, ac, 4);
+ for (i=0;i<4;i++)
+ {
+ tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
+ lpc[i] = MULT16_16_Q15(lpc[i], tmp);
+ }
+ /* Add a zero */
+ lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+ lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+ lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+ lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+ lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+ celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
+}
+
+#if 0 /* This is a simple version of the pitch correlation that should work
+ well on DSPs like Blackfin and TI C5x/C6x */
+
+#ifdef OPUS_FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+{
+ int i, j;
+#ifdef OPUS_FIXED_POINT
+ opus_val32 maxcorr=1;
+#endif
+ for (i=0;i<max_pitch;i++)
+ {
+ opus_val32 sum = 0;
+ for (j=0;j<len;j++)
+ sum = MAC16_16(sum, x[j],y[i+j]);
+ xcorr[i] = sum;
+#ifdef OPUS_FIXED_POINT
+ maxcorr = MAX32(maxcorr, sum);
+#endif
+ }
+#ifdef OPUS_FIXED_POINT
+ return maxcorr;
+#endif
+}
+
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
+
+#ifdef OPUS_FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+{
+ int i,j;
+ /*The EDSP version requires that max_pitch is at least 1, and that _x is
+ 32-bit aligned.
+ Since it's hard to put asserts in assembly, put them here.*/
+ celt_assert(max_pitch>0);
+ celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+#ifdef OPUS_FIXED_POINT
+ opus_val32 maxcorr=1;
+#endif
+ for (i=0;i<max_pitch-3;i+=4)
+ {
+ opus_val32 sum[4]={0,0,0,0};
+ xcorr_kernel(_x, _y+i, sum, len);
+ xcorr[i]=sum[0];
+ xcorr[i+1]=sum[1];
+ xcorr[i+2]=sum[2];
+ xcorr[i+3]=sum[3];
+#ifdef OPUS_FIXED_POINT
+ sum[0] = MAX32(sum[0], sum[1]);
+ sum[2] = MAX32(sum[2], sum[3]);
+ sum[0] = MAX32(sum[0], sum[2]);
+ maxcorr = MAX32(maxcorr, sum[0]);
+#endif
+ }
+ /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+ for (;i<max_pitch;i++)
+ {
+ opus_val32 sum = 0;
+ for (j=0;j<len;j++)
+ sum = MAC16_16(sum, _x[j],_y[i+j]);
+ xcorr[i] = sum;
+#ifdef OPUS_FIXED_POINT
+ maxcorr = MAX32(maxcorr, sum);
+#endif
+ }
+#ifdef OPUS_FIXED_POINT
+ return maxcorr;
+#endif
+}
+
+#endif
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+ int len, int max_pitch, int *pitch, int arch)
+{
+ int i, j;
+ int lag;
+ int best_pitch[2]={0,0};
+ VARDECL(opus_val16, x_lp4);
+ VARDECL(opus_val16, y_lp4);
+ VARDECL(opus_val32, xcorr);
+#ifdef OPUS_FIXED_POINT
+ opus_val32 maxcorr;
+ opus_val32 xmax, ymax;
+ int shift=0;
+#endif
+ int offset;
+
+ SAVE_STACK;
+
+ celt_assert(len>0);
+ celt_assert(max_pitch>0);
+ lag = len+max_pitch;
+
+ ALLOC(x_lp4, len>>2, opus_val16);
+ ALLOC(y_lp4, lag>>2, opus_val16);
+ ALLOC(xcorr, max_pitch>>1, opus_val32);
+
+ /* Downsample by 2 again */
+ for (j=0;j<len>>2;j++)
+ x_lp4[j] = x_lp[2*j];
+ for (j=0;j<lag>>2;j++)
+ y_lp4[j] = y[2*j];
+
+#ifdef OPUS_FIXED_POINT
+ xmax = celt_maxabs16(x_lp4, len>>2);
+ ymax = celt_maxabs16(y_lp4, lag>>2);
+ shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
+ if (shift>0)
+ {
+ for (j=0;j<len>>2;j++)
+ x_lp4[j] = SHR16(x_lp4[j], shift);
+ for (j=0;j<lag>>2;j++)
+ y_lp4[j] = SHR16(y_lp4[j], shift);
+ /* Use double the shift for a MAC */
+ shift *= 2;
+ } else {
+ shift = 0;
+ }
+#endif
+
+ /* Coarse search with 4x decimation */
+
+#ifdef OPUS_FIXED_POINT
+ maxcorr =
+#endif
+ celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
+
+ find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
+#ifdef OPUS_FIXED_POINT
+ , 0, maxcorr
+#endif
+ );
+
+ /* Finer search with 2x decimation */
+#ifdef OPUS_FIXED_POINT
+ maxcorr=1;
+#endif
+ for (i=0;i<max_pitch>>1;i++)
+ {
+ opus_val32 sum=0;
+ xcorr[i] = 0;
+ if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
+ continue;
+ for (j=0;j<len>>1;j++)
+ sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+ xcorr[i] = MAX32(-1, sum);
+#ifdef OPUS_FIXED_POINT
+ maxcorr = MAX32(maxcorr, sum);
+#endif
+ }
+ find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
+#ifdef OPUS_FIXED_POINT
+ , shift+1, maxcorr
+#endif
+ );
+
+ /* Refine by pseudo-interpolation */
+ if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
+ {
+ opus_val32 a, b, c;
+ a = xcorr[best_pitch[0]-1];
+ b = xcorr[best_pitch[0]];
+ c = xcorr[best_pitch[0]+1];
+ if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
+ offset = 1;
+ else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
+ offset = -1;
+ else
+ offset = 0;
+ } else {
+ offset = 0;
+ }
+ *pitch = 2*best_pitch[0]-offset;
+
+ RESTORE_STACK;
+}
+
+static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+ int N, int *T0_, int prev_period, opus_val16 prev_gain)
+{
+ int k, i, T, T0;
+ opus_val16 g, g0;
+ opus_val16 pg;
+ opus_val32 xy,xx,yy,xy2;
+ opus_val32 xcorr[3];
+ opus_val32 best_xy, best_yy;
+ int offset;
+ int minperiod0;
+ VARDECL(opus_val32, yy_lookup);
+ SAVE_STACK;
+
+ minperiod0 = minperiod;
+ maxperiod /= 2;
+ minperiod /= 2;
+ *T0_ /= 2;
+ prev_period /= 2;
+ N /= 2;
+ x += maxperiod;
+ if (*T0_>=maxperiod)
+ *T0_=maxperiod-1;
+
+ T = T0 = *T0_;
+ ALLOC(yy_lookup, maxperiod+1, opus_val32);
+ dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+ yy_lookup[0] = xx;
+ yy=xx;
+ for (i=1;i<=maxperiod;i++)
+ {
+ yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
+ yy_lookup[i] = MAX32(0, yy);
+ }
+ yy = yy_lookup[T0];
+ best_xy = xy;
+ best_yy = yy;
+#ifdef OPUS_FIXED_POINT
+ {
+ opus_val32 x2y2;
+ int sh, t;
+ x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
+ sh = celt_ilog2(x2y2)>>1;
+ t = VSHR32(x2y2, 2*(sh-7));
+ g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
+ }
+#else
+ g = g0 = xy/celt_sqrt(1+xx*yy);
+#endif
+ /* Look for any pitch at T/k */
+ for (k=2;k<=15;k++)
+ {
+ int T1, T1b;
+ opus_val16 g1;
+ opus_val16 cont=0;
+ opus_val16 thresh;
+ T1 = (2*T0+k)/(2*k);
+ if (T1 < minperiod)
+ break;
+ /* Look for another strong correlation at T1b */
+ if (k==2)
+ {
+ if (T1+T0>maxperiod)
+ T1b = T0;
+ else
+ T1b = T0+T1;
+ } else
+ {
+ T1b = (2*second_check[k]*T0+k)/(2*k);
+ }
+ dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
+ xy += xy2;
+ yy = yy_lookup[T1] + yy_lookup[T1b];
+#ifdef OPUS_FIXED_POINT
+ {
+ opus_val32 x2y2;
+ int sh, t;
+ x2y2 = 1+MULT32_32_Q31(xx,yy);
+ sh = celt_ilog2(x2y2)>>1;
+ t = VSHR32(x2y2, 2*(sh-7));
+ g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
+ }
+#else
+ g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
+#endif
+ if (abs(T1-prev_period)<=1)
+ cont = prev_gain;
+ else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
+ cont = HALF32(prev_gain);
+ else
+ cont = 0;
+ thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
+ /* Bias against very high pitch (very short period) to avoid false-positives
+ due to short-term correlation */
+ if (T1<3*minperiod)
+ thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
+ else if (T1<2*minperiod)
+ thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
+ if (g1 > thresh)
+ {
+ best_xy = xy;
+ best_yy = yy;
+ T = T1;
+ g = g1;
+ }
+ }
+ best_xy = MAX32(0, best_xy);
+ if (best_yy <= best_xy)
+ pg = Q15ONE;
+ else
+ pg = SHR32(frac_div32(best_xy,best_yy+1),16);
+
+ for (k=0;k<3;k++)
+ {
+ int T1 = T+k-1;
+ xy = 0;
+ for (i=0;i<N;i++)
+ xy = MAC16_16(xy, x[i], x[i-T1]);
+ xcorr[k] = xy;
+ }
+ if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
+ offset = 1;
+ else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
+ offset = -1;
+ else
+ offset = 0;
+ if (pg > g)
+ pg = g;
+ *T0_ = 2*T+offset;
+
+ if (*T0_<minperiod0)
+ *T0_=minperiod0;
+ RESTORE_STACK;
+ return pg;
+}
diff --git a/drivers/opus/celt/pitch.h b/drivers/opus/celt/pitch.h
new file mode 100644
index 0000000000..3a7d305425
--- /dev/null
+++ b/drivers/opus/celt/pitch.h
@@ -0,0 +1,173 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file pitch.h
+ @brief Pitch analysis
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_H
+#define PITCH_H
+
+#include "opus_modes.h"
+#include "cpu_support.h"
+
+#if defined(__SSE__) && !defined(OPUS_FIXED_POINT)
+#include "x86/pitch_sse.h"
+#endif
+
+#if defined(OPUS_ARM_ASM) && defined(OPUS_FIXED_POINT)
+# include "arm/pitch_arm.h"
+#endif
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+ int len, int C, int arch);
+
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+ int len, int max_pitch, int *pitch, int arch);
+
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+ int N, int *T0, int prev_period, opus_val16 prev_gain);
+
+/* OPT: This is the kernel you really want to optimize. It gets used a lot
+ by the prefilter and by the PLC. */
+#ifndef OVERRIDE_XCORR_KERNEL
+static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+ int j;
+ opus_val16 y_0, y_1, y_2, y_3;
+ celt_assert(len>=3);
+ y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+ y_0=*y++;
+ y_1=*y++;
+ y_2=*y++;
+ for (j=0;j<len-3;j+=4)
+ {
+ opus_val16 tmp;
+ tmp = *x++;
+ y_3=*y++;
+ sum[0] = MAC16_16(sum[0],tmp,y_0);
+ sum[1] = MAC16_16(sum[1],tmp,y_1);
+ sum[2] = MAC16_16(sum[2],tmp,y_2);
+ sum[3] = MAC16_16(sum[3],tmp,y_3);
+ tmp=*x++;
+ y_0=*y++;
+ sum[0] = MAC16_16(sum[0],tmp,y_1);
+ sum[1] = MAC16_16(sum[1],tmp,y_2);
+ sum[2] = MAC16_16(sum[2],tmp,y_3);
+ sum[3] = MAC16_16(sum[3],tmp,y_0);
+ tmp=*x++;
+ y_1=*y++;
+ sum[0] = MAC16_16(sum[0],tmp,y_2);
+ sum[1] = MAC16_16(sum[1],tmp,y_3);
+ sum[2] = MAC16_16(sum[2],tmp,y_0);
+ sum[3] = MAC16_16(sum[3],tmp,y_1);
+ tmp=*x++;
+ y_2=*y++;
+ sum[0] = MAC16_16(sum[0],tmp,y_3);
+ sum[1] = MAC16_16(sum[1],tmp,y_0);
+ sum[2] = MAC16_16(sum[2],tmp,y_1);
+ sum[3] = MAC16_16(sum[3],tmp,y_2);
+ }
+ if (j++<len)
+ {
+ opus_val16 tmp = *x++;
+ y_3=*y++;
+ sum[0] = MAC16_16(sum[0],tmp,y_0);
+ sum[1] = MAC16_16(sum[1],tmp,y_1);
+ sum[2] = MAC16_16(sum[2],tmp,y_2);
+ sum[3] = MAC16_16(sum[3],tmp,y_3);
+ }
+ if (j++<len)
+ {
+ opus_val16 tmp=*x++;
+ y_0=*y++;
+ sum[0] = MAC16_16(sum[0],tmp,y_1);
+ sum[1] = MAC16_16(sum[1],tmp,y_2);
+ sum[2] = MAC16_16(sum[2],tmp,y_3);
+ sum[3] = MAC16_16(sum[3],tmp,y_0);
+ }
+ if (j<len)
+ {
+ opus_val16 tmp=*x++;
+ y_1=*y++;
+ sum[0] = MAC16_16(sum[0],tmp,y_2);
+ sum[1] = MAC16_16(sum[1],tmp,y_3);
+ sum[2] = MAC16_16(sum[2],tmp,y_0);
+ sum[3] = MAC16_16(sum[3],tmp,y_1);
+ }
+}
+#endif /* OVERRIDE_XCORR_KERNEL */
+
+#ifndef OVERRIDE_DUAL_INNER_PROD
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+ int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+ int i;
+ opus_val32 xy01=0;
+ opus_val32 xy02=0;
+ for (i=0;i<N;i++)
+ {
+ xy01 = MAC16_16(xy01, x[i], y01[i]);
+ xy02 = MAC16_16(xy02, x[i], y02[i]);
+ }
+ *xy1 = xy01;
+ *xy2 = xy02;
+}
+#endif
+
+#ifdef OPUS_FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+ opus_val32 *xcorr, int len, int max_pitch);
+
+#if !defined(OVERRIDE_PITCH_XCORR)
+/*Is run-time CPU detection enabled on this platform?*/
+# if defined(OPUS_HAVE_RTCD)
+extern
+# if defined(OPUS_FIXED_POINT)
+opus_val32
+# else
+void
+# endif
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+ const opus_val16 *, opus_val32 *, int, int);
+
+# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+ ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+ xcorr, len, max_pitch))
+# else
+# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+ ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
+# endif
+#endif
+
+#endif
diff --git a/drivers/opus/celt/quant_bands.c b/drivers/opus/celt/quant_bands.c
new file mode 100644
index 0000000000..0a170e850d
--- /dev/null
+++ b/drivers/opus/celt/quant_bands.c
@@ -0,0 +1,556 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "quant_bands.h"
+#include "laplace.h"
+#include <math.h>
+#include "os_support.h"
+#include "arch.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+#include "rate.h"
+
+#ifdef OPUS_FIXED_POINT
+/* Mean energy in each band quantized in Q4 */
+const signed char eMeans[25] = {
+ 103,100, 92, 85, 81,
+ 77, 72, 70, 78, 75,
+ 73, 71, 78, 74, 69,
+ 72, 70, 74, 76, 71,
+ 60, 60, 60, 60, 60
+};
+#else
+/* Mean energy in each band quantized in Q4 and converted back to float */
+const opus_val16 eMeans[25] = {
+ 6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
+ 4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
+ 4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
+ 4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f,
+ 3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f
+};
+#endif
+/* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */
+#ifdef OPUS_FIXED_POINT
+static const opus_val16 pred_coef[4] = {29440, 26112, 21248, 16384};
+static const opus_val16 beta_coef[4] = {30147, 22282, 12124, 6554};
+static const opus_val16 beta_intra = 4915;
+#else
+static const opus_val16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.};
+static const opus_val16 beta_coef[4] = {30147/32768., 22282/32768., 12124/32768., 6554/32768.};
+static const opus_val16 beta_intra = 4915/32768.;
+#endif
+
+/*Parameters of the Laplace-like probability models used for the coarse energy.
+ There is one pair of parameters for each frame size, prediction type
+ (inter/intra), and band number.
+ The first number of each pair is the probability of 0, and the second is the
+ decay rate, both in Q8 precision.*/
+static const unsigned char e_prob_model[4][2][42] = {
+ /*120 sample frames.*/
+ {
+ /*Inter*/
+ {
+ 72, 127, 65, 129, 66, 128, 65, 128, 64, 128, 62, 128, 64, 128,
+ 64, 128, 92, 78, 92, 79, 92, 78, 90, 79, 116, 41, 115, 40,
+ 114, 40, 132, 26, 132, 26, 145, 17, 161, 12, 176, 10, 177, 11
+ },
+ /*Intra*/
+ {
+ 24, 179, 48, 138, 54, 135, 54, 132, 53, 134, 56, 133, 55, 132,
+ 55, 132, 61, 114, 70, 96, 74, 88, 75, 88, 87, 74, 89, 66,
+ 91, 67, 100, 59, 108, 50, 120, 40, 122, 37, 97, 43, 78, 50
+ }
+ },
+ /*240 sample frames.*/
+ {
+ /*Inter*/
+ {
+ 83, 78, 84, 81, 88, 75, 86, 74, 87, 71, 90, 73, 93, 74,
+ 93, 74, 109, 40, 114, 36, 117, 34, 117, 34, 143, 17, 145, 18,
+ 146, 19, 162, 12, 165, 10, 178, 7, 189, 6, 190, 8, 177, 9
+ },
+ /*Intra*/
+ {
+ 23, 178, 54, 115, 63, 102, 66, 98, 69, 99, 74, 89, 71, 91,
+ 73, 91, 78, 89, 86, 80, 92, 66, 93, 64, 102, 59, 103, 60,
+ 104, 60, 117, 52, 123, 44, 138, 35, 133, 31, 97, 38, 77, 45
+ }
+ },
+ /*480 sample frames.*/
+ {
+ /*Inter*/
+ {
+ 61, 90, 93, 60, 105, 42, 107, 41, 110, 45, 116, 38, 113, 38,
+ 112, 38, 124, 26, 132, 27, 136, 19, 140, 20, 155, 14, 159, 16,
+ 158, 18, 170, 13, 177, 10, 187, 8, 192, 6, 175, 9, 159, 10
+ },
+ /*Intra*/
+ {
+ 21, 178, 59, 110, 71, 86, 75, 85, 84, 83, 91, 66, 88, 73,
+ 87, 72, 92, 75, 98, 72, 105, 58, 107, 54, 115, 52, 114, 55,
+ 112, 56, 129, 51, 132, 40, 150, 33, 140, 29, 98, 35, 77, 42
+ }
+ },
+ /*960 sample frames.*/
+ {
+ /*Inter*/
+ {
+ 42, 121, 96, 66, 108, 43, 111, 40, 117, 44, 123, 32, 120, 36,
+ 119, 33, 127, 33, 134, 34, 139, 21, 147, 23, 152, 20, 158, 25,
+ 154, 26, 166, 21, 173, 16, 184, 13, 184, 10, 150, 13, 139, 15
+ },
+ /*Intra*/
+ {
+ 22, 178, 63, 114, 74, 82, 84, 83, 92, 82, 103, 62, 96, 72,
+ 96, 67, 101, 73, 107, 72, 113, 55, 118, 52, 125, 52, 118, 52,
+ 117, 55, 135, 49, 137, 39, 157, 32, 145, 29, 97, 33, 77, 40
+ }
+ }
+};
+
+static const unsigned char small_energy_icdf[3]={2,1,0};
+
+static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C)
+{
+ int c, i;
+ opus_val32 dist = 0;
+ c=0; do {
+ for (i=start;i<end;i++)
+ {
+ opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3));
+ dist = MAC16_16(dist, d,d);
+ }
+ } while (++c<C);
+ return MIN32(200,SHR32(dist,2*DB_SHIFT-6));
+}
+
+static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
+ const opus_val16 *eBands, opus_val16 *oldEBands,
+ opus_int32 budget, opus_int32 tell,
+ const unsigned char *prob_model, opus_val16 *error, ec_enc *enc,
+ int C, int LM, int intra, opus_val16 max_decay, int lfe)
+{
+ int i, c;
+ int badness = 0;
+ opus_val32 prev[2] = {0,0};
+ opus_val16 coef;
+ opus_val16 beta;
+
+ if (tell+3 <= budget)
+ ec_enc_bit_logp(enc, intra, 3);
+ if (intra)
+ {
+ coef = 0;
+ beta = beta_intra;
+ } else {
+ beta = beta_coef[LM];
+ coef = pred_coef[LM];
+ }
+
+ /* Encode at a fixed coarse resolution */
+ for (i=start;i<end;i++)
+ {
+ c=0;
+ do {
+ int bits_left;
+ int qi, qi0;
+ opus_val32 q;
+ opus_val16 x;
+ opus_val32 f, tmp;
+ opus_val16 oldE;
+ opus_val16 decay_bound;
+ x = eBands[i+c*m->nbEBands];
+ oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+#ifdef OPUS_FIXED_POINT
+ f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c];
+ /* Rounding to nearest integer here is really important! */
+ qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7);
+ decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT),
+ SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay)));
+#else
+ f = x-coef*oldE-prev[c];
+ /* Rounding to nearest integer here is really important! */
+ qi = (int)floor(.5f+f);
+ decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay;
+#endif
+ /* Prevent the energy from going down too quickly (e.g. for bands
+ that have just one bin) */
+ if (qi < 0 && x < decay_bound)
+ {
+ qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT);
+ if (qi > 0)
+ qi = 0;
+ }
+ qi0 = qi;
+ /* If we don't have enough bits to encode all the energy, just assume
+ something safe. */
+ tell = ec_tell(enc);
+ bits_left = budget-tell-3*C*(end-i);
+ if (i!=start && bits_left < 30)
+ {
+ if (bits_left < 24)
+ qi = IMIN(1, qi);
+ if (bits_left < 16)
+ qi = IMAX(-1, qi);
+ }
+ if (lfe && i>=2)
+ qi = IMIN(qi, 0);
+ if (budget-tell >= 15)
+ {
+ int pi;
+ pi = 2*IMIN(i,20);
+ ec_laplace_encode(enc, &qi,
+ prob_model[pi]<<7, prob_model[pi+1]<<6);
+ }
+ else if(budget-tell >= 2)
+ {
+ qi = IMAX(-1, IMIN(qi, 1));
+ ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2);
+ }
+ else if(budget-tell >= 1)
+ {
+ qi = IMIN(0, qi);
+ ec_enc_bit_logp(enc, -qi, 1);
+ }
+ else
+ qi = -1;
+ error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT);
+ badness += abs(qi0-qi);
+ q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+ tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7);
+#ifdef OPUS_FIXED_POINT
+ tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+ oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+ prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+ } while (++c < C);
+ }
+ return lfe ? 0 : badness;
+}
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+ const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+ opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
+ int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe)
+{
+ int intra;
+ opus_val16 max_decay;
+ VARDECL(opus_val16, oldEBands_intra);
+ VARDECL(opus_val16, error_intra);
+ ec_enc enc_start_state;
+ opus_uint32 tell;
+ int badness1=0;
+ opus_int32 intra_bias;
+ opus_val32 new_distortion;
+ SAVE_STACK;
+
+ intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C);
+ intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512));
+ new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C);
+
+ tell = ec_tell(enc);
+ if (tell+3 > budget)
+ two_pass = intra = 0;
+
+ max_decay = QCONST16(16.f,DB_SHIFT);
+ if (end-start>10)
+ {
+#ifdef OPUS_FIXED_POINT
+ max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
+#else
+ max_decay = MIN32(max_decay, .125f*nbAvailableBytes);
+#endif
+ }
+ if (lfe)
+ max_decay=3;
+ enc_start_state = *enc;
+
+ ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
+ ALLOC(error_intra, C*m->nbEBands, opus_val16);
+ OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands);
+
+ if (two_pass || intra)
+ {
+ badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget,
+ tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe);
+ }
+
+ if (!intra)
+ {
+ unsigned char *intra_buf;
+ ec_enc enc_intra_state;
+ opus_int32 tell_intra;
+ opus_uint32 nstart_bytes;
+ opus_uint32 nintra_bytes;
+ opus_uint32 save_bytes;
+ int badness2;
+ VARDECL(unsigned char, intra_bits);
+
+ tell_intra = ec_tell_frac(enc);
+
+ enc_intra_state = *enc;
+
+ nstart_bytes = ec_range_bytes(&enc_start_state);
+ nintra_bytes = ec_range_bytes(&enc_intra_state);
+ intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes;
+ save_bytes = nintra_bytes-nstart_bytes;
+ if (save_bytes == 0)
+ save_bytes = ALLOC_NONE;
+ ALLOC(intra_bits, save_bytes, unsigned char);
+ /* Copy bits from intra bit-stream */
+ OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes);
+
+ *enc = enc_start_state;
+
+ badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget,
+ tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
+
+ if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra)))
+ {
+ *enc = enc_intra_state;
+ /* Copy intra bits to bit-stream */
+ OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes);
+ OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+ OPUS_COPY(error, error_intra, C*m->nbEBands);
+ intra = 1;
+ }
+ } else {
+ OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+ OPUS_COPY(error, error_intra, C*m->nbEBands);
+ }
+
+ if (intra)
+ *delayedIntra = new_distortion;
+ else
+ *delayedIntra = ADD32(MULT16_32_Q15(MULT16_16_Q15(pred_coef[LM], pred_coef[LM]),*delayedIntra),
+ new_distortion);
+
+ RESTORE_STACK;
+}
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C)
+{
+ int i, c;
+
+ /* Encode finer resolution */
+ for (i=start;i<end;i++)
+ {
+ opus_int16 frac = 1<<fine_quant[i];
+ if (fine_quant[i] <= 0)
+ continue;
+ c=0;
+ do {
+ int q2;
+ opus_val16 offset;
+#ifdef OPUS_FIXED_POINT
+ /* Has to be without rounding */
+ q2 = (error[i+c*m->nbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]);
+#else
+ q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac);
+#endif
+ if (q2 > frac-1)
+ q2 = frac-1;
+ if (q2<0)
+ q2 = 0;
+ ec_enc_bits(enc, q2, fine_quant[i]);
+#ifdef OPUS_FIXED_POINT
+ offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+ offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+ oldEBands[i+c*m->nbEBands] += offset;
+ error[i+c*m->nbEBands] -= offset;
+ /*printf ("%f ", error[i] - offset);*/
+ } while (++c < C);
+ }
+}
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C)
+{
+ int i, prio, c;
+
+ /* Use up the remaining bits */
+ for (prio=0;prio<2;prio++)
+ {
+ for (i=start;i<end && bits_left>=C ;i++)
+ {
+ if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+ continue;
+ c=0;
+ do {
+ int q2;
+ opus_val16 offset;
+ q2 = error[i+c*m->nbEBands]<0 ? 0 : 1;
+ ec_enc_bits(enc, q2, 1);
+#ifdef OPUS_FIXED_POINT
+ offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+ offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+ oldEBands[i+c*m->nbEBands] += offset;
+ bits_left--;
+ } while (++c < C);
+ }
+ }
+}
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM)
+{
+ const unsigned char *prob_model = e_prob_model[LM][intra];
+ int i, c;
+ opus_val32 prev[2] = {0, 0};
+ opus_val16 coef;
+ opus_val16 beta;
+ opus_int32 budget;
+ opus_int32 tell;
+
+ if (intra)
+ {
+ coef = 0;
+ beta = beta_intra;
+ } else {
+ beta = beta_coef[LM];
+ coef = pred_coef[LM];
+ }
+
+ budget = dec->storage*8;
+
+ /* Decode at a fixed coarse resolution */
+ for (i=start;i<end;i++)
+ {
+ c=0;
+ do {
+ int qi;
+ opus_val32 q;
+ opus_val32 tmp;
+ /* It would be better to express this invariant as a
+ test on C at function entry, but that isn't enough
+ to make the static analyzer happy. */
+ celt_assert(c<2);
+ tell = ec_tell(dec);
+ if(budget-tell>=15)
+ {
+ int pi;
+ pi = 2*IMIN(i,20);
+ qi = ec_laplace_decode(dec,
+ prob_model[pi]<<7, prob_model[pi+1]<<6);
+ }
+ else if(budget-tell>=2)
+ {
+ qi = ec_dec_icdf(dec, small_energy_icdf, 2);
+ qi = (qi>>1)^-(qi&1);
+ }
+ else if(budget-tell>=1)
+ {
+ qi = -ec_dec_bit_logp(dec, 1);
+ }
+ else
+ qi = -1;
+ q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+ oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+ tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7);
+#ifdef OPUS_FIXED_POINT
+ tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+ oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+ prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+ } while (++c < C);
+ }
+}
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C)
+{
+ int i, c;
+ /* Decode finer resolution */
+ for (i=start;i<end;i++)
+ {
+ if (fine_quant[i] <= 0)
+ continue;
+ c=0;
+ do {
+ int q2;
+ opus_val16 offset;
+ q2 = ec_dec_bits(dec, fine_quant[i]);
+#ifdef OPUS_FIXED_POINT
+ offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+ offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+ oldEBands[i+c*m->nbEBands] += offset;
+ } while (++c < C);
+ }
+}
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C)
+{
+ int i, prio, c;
+
+ /* Use up the remaining bits */
+ for (prio=0;prio<2;prio++)
+ {
+ for (i=start;i<end && bits_left>=C ;i++)
+ {
+ if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+ continue;
+ c=0;
+ do {
+ int q2;
+ opus_val16 offset;
+ q2 = ec_dec_bits(dec, 1);
+#ifdef OPUS_FIXED_POINT
+ offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+ offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+ oldEBands[i+c*m->nbEBands] += offset;
+ bits_left--;
+ } while (++c < C);
+ }
+ }
+}
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+ celt_ener *bandE, opus_val16 *bandLogE, int C)
+{
+ int c, i;
+ c=0;
+ do {
+ for (i=0;i<effEnd;i++)
+ bandLogE[i+c*m->nbEBands] =
+ celt_log2(SHL32(bandE[i+c*m->nbEBands],2))
+ - SHL16((opus_val16)eMeans[i],6);
+ for (i=effEnd;i<end;i++)
+ bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
+ } while (++c < C);
+}
diff --git a/drivers/opus/celt/quant_bands.h b/drivers/opus/celt/quant_bands.h
new file mode 100644
index 0000000000..840df8723f
--- /dev/null
+++ b/drivers/opus/celt/quant_bands.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef QUANT_BANDS
+#define QUANT_BANDS
+
+#include "arch.h"
+#include "opus_modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "mathops.h"
+
+#ifdef OPUS_FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+ celt_ener *bandE, opus_val16 *bandLogE, int C);
+
+void log2Amp(const CELTMode *m, int start, int end,
+ celt_ener *eBands, const opus_val16 *oldEBands, int C);
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+ const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+ opus_val16 *error, ec_enc *enc, int C, int LM,
+ int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra,
+ int two_pass, int loss_rate, int lfe);
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C);
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C);
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM);
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C);
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C);
+
+#endif /* QUANT_BANDS */
diff --git a/drivers/opus/celt/rate.c b/drivers/opus/celt/rate.c
new file mode 100644
index 0000000000..cca585ad95
--- /dev/null
+++ b/drivers/opus/celt/rate.c
@@ -0,0 +1,638 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <math.h>
+#include "opus_modes.h"
+#include "cwrs.h"
+#include "arch.h"
+#include "os_support.h"
+
+#include "entcode.h"
+#include "rate.h"
+
+static const unsigned char LOG2_FRAC_TABLE[24]={
+ 0,
+ 8,13,
+ 16,19,21,23,
+ 24,26,27,28,29,30,31,32,
+ 32,33,34,34,35,36,36,37,37
+};
+
+#ifdef CUSTOM_MODES
+
+/*Determines if V(N,K) fits in a 32-bit unsigned integer.
+ N and K are themselves limited to 15 bits.*/
+static int fits_in32(int _n, int _k)
+{
+ static const opus_int16 maxN[15] = {
+ 32767, 32767, 32767, 1476, 283, 109, 60, 40,
+ 29, 24, 20, 18, 16, 14, 13};
+ static const opus_int16 maxK[15] = {
+ 32767, 32767, 32767, 32767, 1172, 238, 95, 53,
+ 36, 27, 22, 18, 16, 15, 13};
+ if (_n>=14)
+ {
+ if (_k>=14)
+ return 0;
+ else
+ return _n <= maxN[_k];
+ } else {
+ return _k <= maxK[_n];
+ }
+}
+
+void compute_pulse_cache(CELTMode *m, int LM)
+{
+ int C;
+ int i;
+ int j;
+ int curr=0;
+ int nbEntries=0;
+ int entryN[100], entryK[100], entryI[100];
+ const opus_int16 *eBands = m->eBands;
+ PulseCache *cache = &m->cache;
+ opus_int16 *cindex;
+ unsigned char *bits;
+ unsigned char *cap;
+
+ cindex = (opus_int16 *)opus_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2));
+ cache->index = cindex;
+
+ /* Scan for all unique band sizes */
+ for (i=0;i<=LM+1;i++)
+ {
+ for (j=0;j<m->nbEBands;j++)
+ {
+ int k;
+ int N = (eBands[j+1]-eBands[j])<<i>>1;
+ cindex[i*m->nbEBands+j] = -1;
+ /* Find other bands that have the same size */
+ for (k=0;k<=i;k++)
+ {
+ int n;
+ for (n=0;n<m->nbEBands && (k!=i || n<j);n++)
+ {
+ if (N == (eBands[n+1]-eBands[n])<<k>>1)
+ {
+ cindex[i*m->nbEBands+j] = cindex[k*m->nbEBands+n];
+ break;
+ }
+ }
+ }
+ if (cache->index[i*m->nbEBands+j] == -1 && N!=0)
+ {
+ int K;
+ entryN[nbEntries] = N;
+ K = 0;
+ while (fits_in32(N,get_pulses(K+1)) && K<MAX_PSEUDO)
+ K++;
+ entryK[nbEntries] = K;
+ cindex[i*m->nbEBands+j] = curr;
+ entryI[nbEntries] = curr;
+
+ curr += K+1;
+ nbEntries++;
+ }
+ }
+ }
+ bits = (unsigned char *)opus_alloc(sizeof(unsigned char)*curr);
+ cache->bits = bits;
+ cache->size = curr;
+ /* Compute the cache for all unique sizes */
+ for (i=0;i<nbEntries;i++)
+ {
+ unsigned char *ptr = bits+entryI[i];
+ opus_int16 tmp[MAX_PULSES+1];
+ get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
+ for (j=1;j<=entryK[i];j++)
+ ptr[j] = tmp[get_pulses(j)]-1;
+ ptr[0] = entryK[i];
+ }
+
+ /* Compute the maximum rate for each band at which we'll reliably use as
+ many bits as we ask for. */
+ cache->caps = cap = (unsigned char *)opus_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands);
+ for (i=0;i<=LM;i++)
+ {
+ for (C=1;C<=2;C++)
+ {
+ for (j=0;j<m->nbEBands;j++)
+ {
+ int N0;
+ int max_bits;
+ N0 = m->eBands[j+1]-m->eBands[j];
+ /* N=1 bands only have a sign bit and fine bits. */
+ if (N0<<i == 1)
+ max_bits = C*(1+MAX_FINE_BITS)<<BITRES;
+ else
+ {
+ const unsigned char *pcache;
+ opus_int32 num;
+ opus_int32 den;
+ int LM0;
+ int N;
+ int offset;
+ int ndof;
+ int qb;
+ int k;
+ LM0 = 0;
+ /* Even-sized bands bigger than N=2 can be split one more time.
+ As of commit 44203907 all bands >1 are even, including custom modes.*/
+ if (N0 > 2)
+ {
+ N0>>=1;
+ LM0--;
+ }
+ /* N0=1 bands can't be split down to N<2. */
+ else if (N0 <= 1)
+ {
+ LM0=IMIN(i,1);
+ N0<<=LM0;
+ }
+ /* Compute the cost for the lowest-level PVQ of a fully split
+ band. */
+ pcache = bits + cindex[(LM0+1)*m->nbEBands+j];
+ max_bits = pcache[pcache[0]]+1;
+ /* Add in the cost of coding regular splits. */
+ N = N0;
+ for(k=0;k<i-LM0;k++){
+ max_bits <<= 1;
+ /* Offset the number of qtheta bits by log2(N)/2
+ + QTHETA_OFFSET compared to their "fair share" of
+ total/N */
+ offset = ((m->logN[j]+((LM0+k)<<BITRES))>>1)-QTHETA_OFFSET;
+ /* The number of qtheta bits we'll allocate if the remainder
+ is to be max_bits.
+ The average measured cost for theta is 0.89701 times qb,
+ approximated here as 459/512. */
+ num=459*(opus_int32)((2*N-1)*offset+max_bits);
+ den=((opus_int32)(2*N-1)<<9)-459;
+ qb = IMIN((num+(den>>1))/den, 57);
+ celt_assert(qb >= 0);
+ max_bits += qb;
+ N <<= 1;
+ }
+ /* Add in the cost of a stereo split, if necessary. */
+ if (C==2)
+ {
+ max_bits <<= 1;
+ offset = ((m->logN[j]+(i<<BITRES))>>1)-(N==2?QTHETA_OFFSET_TWOPHASE:QTHETA_OFFSET);
+ ndof = 2*N-1-(N==2);
+ /* The average measured cost for theta with the step PDF is
+ 0.95164 times qb, approximated here as 487/512. */
+ num = (N==2?512:487)*(opus_int32)(max_bits+ndof*offset);
+ den = ((opus_int32)ndof<<9)-(N==2?512:487);
+ qb = IMIN((num+(den>>1))/den, (N==2?64:61));
+ celt_assert(qb >= 0);
+ max_bits += qb;
+ }
+ /* Add the fine bits we'll use. */
+ /* Compensate for the extra DoF in stereo */
+ ndof = C*N + ((C==2 && N>2) ? 1 : 0);
+ /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET
+ compared to their "fair share" of total/N */
+ offset = ((m->logN[j] + (i<<BITRES))>>1)-FINE_OFFSET;
+ /* N=2 is the only point that doesn't match the curve */
+ if (N==2)
+ offset += 1<<BITRES>>2;
+ /* The number of fine bits we'll allocate if the remainder is
+ to be max_bits. */
+ num = max_bits+ndof*offset;
+ den = (ndof-1)<<BITRES;
+ qb = IMIN((num+(den>>1))/den, MAX_FINE_BITS);
+ celt_assert(qb >= 0);
+ max_bits += C*qb<<BITRES;
+ }
+ max_bits = (4*max_bits/(C*((m->eBands[j+1]-m->eBands[j])<<i)))-64;
+ celt_assert(max_bits >= 0);
+ celt_assert(max_bits < 256);
+ *cap++ = (unsigned char)max_bits;
+ }
+ }
+ }
+}
+
+#endif /* CUSTOM_MODES */
+
+#define ALLOC_STEPS 6
+
+static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
+ const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
+ int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
+ int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+ opus_int32 psum;
+ int lo, hi;
+ int i, j;
+ int logM;
+ int stereo;
+ int codedBands=-1;
+ int alloc_floor;
+ opus_int32 left, percoeff;
+ int done;
+ opus_int32 balance;
+ SAVE_STACK;
+
+ alloc_floor = C<<BITRES;
+ stereo = C>1;
+
+ logM = LM<<BITRES;
+ lo = 0;
+ hi = 1<<ALLOC_STEPS;
+ for (i=0;i<ALLOC_STEPS;i++)
+ {
+ int mid = (lo+hi)>>1;
+ psum = 0;
+ done = 0;
+ for (j=end;j-->start;)
+ {
+ int tmp = bits1[j] + (mid*(opus_int32)bits2[j]>>ALLOC_STEPS);
+ if (tmp >= thresh[j] || done)
+ {
+ done = 1;
+ /* Don't allocate more than we can actually use */
+ psum += IMIN(tmp, cap[j]);
+ } else {
+ if (tmp >= alloc_floor)
+ psum += alloc_floor;
+ }
+ }
+ if (psum > total)
+ hi = mid;
+ else
+ lo = mid;
+ }
+ psum = 0;
+ /*printf ("interp bisection gave %d\n", lo);*/
+ done = 0;
+ for (j=end;j-->start;)
+ {
+ int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
+ if (tmp < thresh[j] && !done)
+ {
+ if (tmp >= alloc_floor)
+ tmp = alloc_floor;
+ else
+ tmp = 0;
+ } else
+ done = 1;
+ /* Don't allocate more than we can actually use */
+ tmp = IMIN(tmp, cap[j]);
+ bits[j] = tmp;
+ psum += tmp;
+ }
+
+ /* Decide which bands to skip, working backwards from the end. */
+ for (codedBands=end;;codedBands--)
+ {
+ int band_width;
+ int band_bits;
+ int rem;
+ j = codedBands-1;
+ /* Never skip the first band, nor a band that has been boosted by
+ dynalloc.
+ In the first case, we'd be coding a bit to signal we're going to waste
+ all the other bits.
+ In the second case, we'd be coding a bit to redistribute all the bits
+ we just signaled should be cocentrated in this band. */
+ if (j<=skip_start)
+ {
+ /* Give the bit we reserved to end skipping back. */
+ total += skip_rsv;
+ break;
+ }
+ /*Figure out how many left-over bits we would be adding to this band.
+ This can include bits we've stolen back from higher, skipped bands.*/
+ left = total-psum;
+ percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+ left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+ rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
+ band_width = m->eBands[codedBands]-m->eBands[j];
+ band_bits = (int)(bits[j] + percoeff*band_width + rem);
+ /*Only code a skip decision if we're above the threshold for this band.
+ Otherwise it is force-skipped.
+ This ensures that we have enough bits to code the skip flag.*/
+ if (band_bits >= IMAX(thresh[j], alloc_floor+(1<<BITRES)))
+ {
+ if (encode)
+ {
+ /*This if() block is the only part of the allocation function that
+ is not a mandatory part of the bitstream: any bands we choose to
+ skip here must be explicitly signaled.*/
+ /*Choose a threshold with some hysteresis to keep bands from
+ fluctuating in and out.*/
+#ifdef FUZZING
+ if ((rand()&0x1) == 0)
+#else
+ if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
+#endif
+ {
+ ec_enc_bit_logp(ec, 1, 1);
+ break;
+ }
+ ec_enc_bit_logp(ec, 0, 1);
+ } else if (ec_dec_bit_logp(ec, 1)) {
+ break;
+ }
+ /*We used a bit to skip this band.*/
+ psum += 1<<BITRES;
+ band_bits -= 1<<BITRES;
+ }
+ /*Reclaim the bits originally allocated to this band.*/
+ psum -= bits[j]+intensity_rsv;
+ if (intensity_rsv > 0)
+ intensity_rsv = LOG2_FRAC_TABLE[j-start];
+ psum += intensity_rsv;
+ if (band_bits >= alloc_floor)
+ {
+ /*If we have enough for a fine energy bit per channel, use it.*/
+ psum += alloc_floor;
+ bits[j] = alloc_floor;
+ } else {
+ /*Otherwise this band gets nothing at all.*/
+ bits[j] = 0;
+ }
+ }
+
+ celt_assert(codedBands > start);
+ /* Code the intensity and dual stereo parameters. */
+ if (intensity_rsv > 0)
+ {
+ if (encode)
+ {
+ *intensity = IMIN(*intensity, codedBands);
+ ec_enc_uint(ec, *intensity-start, codedBands+1-start);
+ }
+ else
+ *intensity = start+ec_dec_uint(ec, codedBands+1-start);
+ }
+ else
+ *intensity = 0;
+ if (*intensity <= start)
+ {
+ total += dual_stereo_rsv;
+ dual_stereo_rsv = 0;
+ }
+ if (dual_stereo_rsv > 0)
+ {
+ if (encode)
+ ec_enc_bit_logp(ec, *dual_stereo, 1);
+ else
+ *dual_stereo = ec_dec_bit_logp(ec, 1);
+ }
+ else
+ *dual_stereo = 0;
+
+ /* Allocate the remaining bits */
+ left = total-psum;
+ percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+ left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+ for (j=start;j<codedBands;j++)
+ bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
+ for (j=start;j<codedBands;j++)
+ {
+ int tmp = (int)IMIN(left, m->eBands[j+1]-m->eBands[j]);
+ bits[j] += tmp;
+ left -= tmp;
+ }
+ /*for (j=0;j<end;j++)printf("%d ", bits[j]);printf("\n");*/
+
+ balance = 0;
+ for (j=start;j<codedBands;j++)
+ {
+ int N0, N, den;
+ int offset;
+ int NClogN;
+ opus_int32 excess, bit;
+
+ celt_assert(bits[j] >= 0);
+ N0 = m->eBands[j+1]-m->eBands[j];
+ N=N0<<LM;
+ bit = (opus_int32)bits[j]+balance;
+
+ if (N>1)
+ {
+ excess = MAX32(bit-cap[j],0);
+ bits[j] = bit-excess;
+
+ /* Compensate for the extra DoF in stereo */
+ den=(C*N+ ((C==2 && N>2 && !*dual_stereo && j<*intensity) ? 1 : 0));
+
+ NClogN = den*(m->logN[j] + logM);
+
+ /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET
+ compared to their "fair share" of total/N */
+ offset = (NClogN>>1)-den*FINE_OFFSET;
+
+ /* N=2 is the only point that doesn't match the curve */
+ if (N==2)
+ offset += den<<BITRES>>2;
+
+ /* Changing the offset for allocating the second and third
+ fine energy bit */
+ if (bits[j] + offset < den*2<<BITRES)
+ offset += NClogN>>2;
+ else if (bits[j] + offset < den*3<<BITRES)
+ offset += NClogN>>3;
+
+ /* Divide with rounding */
+ ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES));
+
+ /* Make sure not to bust */
+ if (C*ebits[j] > (bits[j]>>BITRES))
+ ebits[j] = bits[j] >> stereo >> BITRES;
+
+ /* More than that is useless because that's about as far as PVQ can go */
+ ebits[j] = IMIN(ebits[j], MAX_FINE_BITS);
+
+ /* If we rounded down or capped this band, make it a candidate for the
+ final fine energy pass */
+ fine_priority[j] = ebits[j]*(den<<BITRES) >= bits[j]+offset;
+
+ /* Remove the allocated fine bits; the rest are assigned to PVQ */
+ bits[j] -= C*ebits[j]<<BITRES;
+
+ } else {
+ /* For N=1, all bits go to fine energy except for a single sign bit */
+ excess = MAX32(0,bit-(C<<BITRES));
+ bits[j] = bit-excess;
+ ebits[j] = 0;
+ fine_priority[j] = 1;
+ }
+
+ /* Fine energy can't take advantage of the re-balancing in
+ quant_all_bands().
+ Instead, do the re-balancing here.*/
+ if(excess > 0)
+ {
+ int extra_fine;
+ int extra_bits;
+ extra_fine = IMIN(excess>>(stereo+BITRES),MAX_FINE_BITS-ebits[j]);
+ ebits[j] += extra_fine;
+ extra_bits = extra_fine*C<<BITRES;
+ fine_priority[j] = extra_bits >= excess-balance;
+ excess -= extra_bits;
+ }
+ balance = excess;
+
+ celt_assert(bits[j] >= 0);
+ celt_assert(ebits[j] >= 0);
+ }
+ /* Save any remaining bits over the cap for the rebalancing in
+ quant_all_bands(). */
+ *_balance = balance;
+
+ /* The skipped bands use all their bits for fine energy. */
+ for (;j<end;j++)
+ {
+ ebits[j] = bits[j] >> stereo >> BITRES;
+ celt_assert(C*ebits[j]<<BITRES == bits[j]);
+ bits[j] = 0;
+ fine_priority[j] = ebits[j]<1;
+ }
+ RESTORE_STACK;
+ return codedBands;
+}
+
+int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
+ opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+ int lo, hi, len, j;
+ int codedBands;
+ int skip_start;
+ int skip_rsv;
+ int intensity_rsv;
+ int dual_stereo_rsv;
+ VARDECL(int, bits1);
+ VARDECL(int, bits2);
+ VARDECL(int, thresh);
+ VARDECL(int, trim_offset);
+ SAVE_STACK;
+
+ total = IMAX(total, 0);
+ len = m->nbEBands;
+ skip_start = start;
+ /* Reserve a bit to signal the end of manually skipped bands. */
+ skip_rsv = total >= 1<<BITRES ? 1<<BITRES : 0;
+ total -= skip_rsv;
+ /* Reserve bits for the intensity and dual stereo parameters. */
+ intensity_rsv = dual_stereo_rsv = 0;
+ if (C==2)
+ {
+ intensity_rsv = LOG2_FRAC_TABLE[end-start];
+ if (intensity_rsv>total)
+ intensity_rsv = 0;
+ else
+ {
+ total -= intensity_rsv;
+ dual_stereo_rsv = total>=1<<BITRES ? 1<<BITRES : 0;
+ total -= dual_stereo_rsv;
+ }
+ }
+ ALLOC(bits1, len, int);
+ ALLOC(bits2, len, int);
+ ALLOC(thresh, len, int);
+ ALLOC(trim_offset, len, int);
+
+ for (j=start;j<end;j++)
+ {
+ /* Below this threshold, we're sure not to allocate any PVQ bits */
+ thresh[j] = IMAX((C)<<BITRES, (3*(m->eBands[j+1]-m->eBands[j])<<LM<<BITRES)>>4);
+ /* Tilt of the allocation curve */
+ trim_offset[j] = C*(m->eBands[j+1]-m->eBands[j])*(alloc_trim-5-LM)*(end-j-1)
+ *(1<<(LM+BITRES))>>6;
+ /* Giving less resolution to single-coefficient bands because they get
+ more benefit from having one coarse value per coefficient*/
+ if ((m->eBands[j+1]-m->eBands[j])<<LM==1)
+ trim_offset[j] -= C<<BITRES;
+ }
+ lo = 1;
+ hi = m->nbAllocVectors - 1;
+ do
+ {
+ int done = 0;
+ int psum = 0;
+ int mid = (lo+hi) >> 1;
+ for (j=end;j-->start;)
+ {
+ int bitsj;
+ int N = m->eBands[j+1]-m->eBands[j];
+ bitsj = C*N*m->allocVectors[mid*len+j]<<LM>>2;
+ if (bitsj > 0)
+ bitsj = IMAX(0, bitsj + trim_offset[j]);
+ bitsj += offsets[j];
+ if (bitsj >= thresh[j] || done)
+ {
+ done = 1;
+ /* Don't allocate more than we can actually use */
+ psum += IMIN(bitsj, cap[j]);
+ } else {
+ if (bitsj >= C<<BITRES)
+ psum += C<<BITRES;
+ }
+ }
+ if (psum > total)
+ hi = mid - 1;
+ else
+ lo = mid + 1;
+ /*printf ("lo = %d, hi = %d\n", lo, hi);*/
+ }
+ while (lo <= hi);
+ hi = lo--;
+ /*printf ("interp between %d and %d\n", lo, hi);*/
+ for (j=start;j<end;j++)
+ {
+ int bits1j, bits2j;
+ int N = m->eBands[j+1]-m->eBands[j];
+ bits1j = C*N*m->allocVectors[lo*len+j]<<LM>>2;
+ bits2j = hi>=m->nbAllocVectors ?
+ cap[j] : C*N*m->allocVectors[hi*len+j]<<LM>>2;
+ if (bits1j > 0)
+ bits1j = IMAX(0, bits1j + trim_offset[j]);
+ if (bits2j > 0)
+ bits2j = IMAX(0, bits2j + trim_offset[j]);
+ if (lo > 0)
+ bits1j += offsets[j];
+ bits2j += offsets[j];
+ if (offsets[j]>0)
+ skip_start = j;
+ bits2j = IMAX(0,bits2j-bits1j);
+ bits1[j] = bits1j;
+ bits2[j] = bits2j;
+ }
+ codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap,
+ total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv,
+ pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth);
+ RESTORE_STACK;
+ return codedBands;
+}
+
diff --git a/drivers/opus/celt/rate.h b/drivers/opus/celt/rate.h
new file mode 100644
index 0000000000..7ced23ea09
--- /dev/null
+++ b/drivers/opus/celt/rate.h
@@ -0,0 +1,101 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef RATE_H
+#define RATE_H
+
+#define MAX_PSEUDO 40
+#define LOG_MAX_PSEUDO 6
+
+#define MAX_PULSES 128
+
+#define MAX_FINE_BITS 8
+
+#define FINE_OFFSET 21
+#define QTHETA_OFFSET 4
+#define QTHETA_OFFSET_TWOPHASE 16
+
+#include "cwrs.h"
+#include "opus_modes.h"
+
+void compute_pulse_cache(CELTMode *m, int LM);
+
+static OPUS_INLINE int get_pulses(int i)
+{
+ return i<8 ? i : (8 + (i&7)) << ((i>>3)-1);
+}
+
+static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits)
+{
+ int i;
+ int lo, hi;
+ const unsigned char *cache;
+
+ LM++;
+ cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+
+ lo = 0;
+ hi = cache[0];
+ bits--;
+ for (i=0;i<LOG_MAX_PSEUDO;i++)
+ {
+ int mid = (lo+hi+1)>>1;
+ /* OPT: Make sure this is implemented with a conditional move */
+ if ((int)cache[mid] >= bits)
+ hi = mid;
+ else
+ lo = mid;
+ }
+ if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits)
+ return lo;
+ else
+ return hi;
+}
+
+static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
+{
+ const unsigned char *cache;
+
+ LM++;
+ cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+ return pulses == 0 ? 0 : cache[pulses]+1;
+}
+
+/** Compute the pulse allocation, i.e. how many pulses will go in each
+ * band.
+ @param m mode
+ @param offsets Requested increase or decrease in the number of bits for
+ each band
+ @param total Number of bands
+ @param pulses Number of pulses per band (returned)
+ @return Total number of bits allocated
+*/
+int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero,
+ opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
+
+#endif
diff --git a/drivers/opus/celt/stack_alloc.h b/drivers/opus/celt/stack_alloc.h
new file mode 100644
index 0000000000..d500c4dab9
--- /dev/null
+++ b/drivers/opus/celt/stack_alloc.h
@@ -0,0 +1,182 @@
+/* Copyright (C) 2002-2003 Jean-Marc Valin
+ Copyright (C) 2007-2009 Xiph.Org Foundation */
+/**
+ @file stack_alloc.h
+ @brief Temporary memory allocation on stack
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef STACK_ALLOC_H
+#define STACK_ALLOC_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK))
+#define VAR_ARRAYS
+#endif
+
+#ifdef USE_ALLOCA
+# ifdef WIN32
+# include <malloc.h>
+# else
+# ifdef OPUS_HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef __linux__
+# include <alloca.h>
+# else
+# include <stdlib.h>
+# endif
+# endif
+# endif
+#endif
+
+/**
+ * @def ALIGN(stack, size)
+ *
+ * Aligns the stack to a 'size' boundary
+ *
+ * @param stack Stack
+ * @param size New size boundary
+ */
+
+/**
+ * @def PUSH(stack, size, type)
+ *
+ * Allocates 'size' elements of type 'type' on the stack
+ *
+ * @param stack Stack
+ * @param size Number of elements
+ * @param type Type of element
+ */
+
+/**
+ * @def VARDECL(var)
+ *
+ * Declare variable on stack
+ *
+ * @param var Variable to declare
+ */
+
+/**
+ * @def ALLOC(var, size, type)
+ *
+ * Allocate 'size' elements of 'type' on stack
+ *
+ * @param var Name of variable to allocate
+ * @param size Number of elements
+ * @param type Type of element
+ */
+
+#if defined(VAR_ARRAYS)
+
+#define VARDECL(type, var)
+#define ALLOC(var, size, type) type var[size]
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+/* C99 does not allow VLAs of size zero */
+#define ALLOC_NONE 1
+
+#elif defined(USE_ALLOCA)
+
+#define VARDECL(type, var) type *var
+
+# ifdef WIN32
+# define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size)))
+# else
+# define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+# endif
+
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+#define ALLOC_NONE 0
+
+#else
+
+#ifdef CELT_C
+char *global_stack=0;
+#else
+extern char *global_stack;
+#endif /* CELT_C */
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+
+#ifdef CELT_C
+char *global_stack_top=0;
+#else
+extern char *global_stack_top;
+#endif /* CELT_C */
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (VALGRIND_MAKE_MEM_NOACCESS(stack, global_stack_top-stack),ALIGN((stack),sizeof(type)/sizeof(char)),VALGRIND_MAKE_MEM_UNDEFINED(stack, ((size)*sizeof(type)/sizeof(char))),(stack)+=(2*(size)*sizeof(type)/sizeof(char)),(type*)((stack)-(2*(size)*sizeof(type)/sizeof(char))))
+#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack))
+#define ALLOC_STACK char *_saved_stack; ((global_stack = (global_stack==0) ? ((global_stack_top=opus_alloc_scratch(GLOBAL_STACK_SIZE*2)+(GLOBAL_STACK_SIZE*2))-(GLOBAL_STACK_SIZE*2)) : global_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)); _saved_stack = global_stack;
+
+#else
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
+#define RESTORE_STACK (global_stack = _saved_stack)
+#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
+
+#endif /* ENABLE_VALGRIND */
+
+#include "os_support.h"
+#define VARDECL(type, var) type *var
+#define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
+#define SAVE_STACK char *_saved_stack = global_stack;
+#define ALLOC_NONE 0
+
+#endif /* VAR_ARRAYS */
+
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0)
+#define OPUS_FPRINTF fprintf
+
+#else
+
+static OPUS_INLINE int _opus_false(void) {return 0;}
+#define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
+#define OPUS_CHECK_VALUE(value) _opus_false()
+#define OPUS_PRINT_INT(value) do{}while(0)
+#define OPUS_FPRINTF (void)
+
+#endif
+
+
+#endif /* STACK_ALLOC_H */
diff --git a/drivers/opus/celt/static_modes_fixed.h b/drivers/opus/celt/static_modes_fixed.h
new file mode 100644
index 0000000000..d23e2a66f5
--- /dev/null
+++ b/drivers/opus/celt/static_modes_fixed.h
@@ -0,0 +1,595 @@
+/* The contents of this file was automatically generated by dump_modes.c
+ with arguments: 48000 960
+ It contains static definitions for some pre-defined modes. */
+#include "opus_modes.h"
+#include "rate.h"
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+2, 20, 55, 108, 178,
+266, 372, 494, 635, 792,
+966, 1157, 1365, 1590, 1831,
+2089, 2362, 2651, 2956, 3276,
+3611, 3961, 4325, 4703, 5094,
+5499, 5916, 6346, 6788, 7241,
+7705, 8179, 8663, 9156, 9657,
+10167, 10684, 11207, 11736, 12271,
+12810, 13353, 13899, 14447, 14997,
+15547, 16098, 16648, 17197, 17744,
+18287, 18827, 19363, 19893, 20418,
+20936, 21447, 21950, 22445, 22931,
+23407, 23874, 24330, 24774, 25208,
+25629, 26039, 26435, 26819, 27190,
+27548, 27893, 28224, 28541, 28845,
+29135, 29411, 29674, 29924, 30160,
+30384, 30594, 30792, 30977, 31151,
+31313, 31463, 31602, 31731, 31849,
+31958, 32057, 32148, 32229, 32303,
+32370, 32429, 32481, 32528, 32568,
+32604, 32634, 32661, 32683, 32701,
+32717, 32729, 32740, 32748, 32754,
+32758, 32762, 32764, 32766, 32767,
+32767, 32767, 32767, 32767, 32767,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{32767, 0}, {32766, -429},
+{32757, -858}, {32743, -1287},
+{32724, -1715}, {32698, -2143},
+{32667, -2570}, {32631, -2998},
+{32588, -3425}, {32541, -3851},
+{32488, -4277}, {32429, -4701},
+{32364, -5125}, {32295, -5548},
+{32219, -5971}, {32138, -6393},
+{32051, -6813}, {31960, -7231},
+{31863, -7650}, {31760, -8067},
+{31652, -8481}, {31539, -8895},
+{31419, -9306}, {31294, -9716},
+{31165, -10126}, {31030, -10532},
+{30889, -10937}, {30743, -11340},
+{30592, -11741}, {30436, -12141},
+{30274, -12540}, {30107, -12935},
+{29936, -13328}, {29758, -13718},
+{29577, -14107}, {29390, -14493},
+{29197, -14875}, {29000, -15257},
+{28797, -15635}, {28590, -16010},
+{28379, -16384}, {28162, -16753},
+{27940, -17119}, {27714, -17484},
+{27482, -17845}, {27246, -18205},
+{27006, -18560}, {26760, -18911},
+{26510, -19260}, {26257, -19606},
+{25997, -19947}, {25734, -20286},
+{25466, -20621}, {25194, -20952},
+{24918, -21281}, {24637, -21605},
+{24353, -21926}, {24063, -22242},
+{23770, -22555}, {23473, -22865},
+{23171, -23171}, {22866, -23472},
+{22557, -23769}, {22244, -24063},
+{21927, -24352}, {21606, -24636},
+{21282, -24917}, {20954, -25194},
+{20622, -25465}, {20288, -25733},
+{19949, -25997}, {19607, -26255},
+{19261, -26509}, {18914, -26760},
+{18561, -27004}, {18205, -27246},
+{17846, -27481}, {17485, -27713},
+{17122, -27940}, {16755, -28162},
+{16385, -28378}, {16012, -28590},
+{15636, -28797}, {15258, -28999},
+{14878, -29197}, {14494, -29389},
+{14108, -29576}, {13720, -29757},
+{13329, -29934}, {12937, -30107},
+{12540, -30274}, {12142, -30435},
+{11744, -30592}, {11342, -30743},
+{10939, -30889}, {10534, -31030},
+{10127, -31164}, {9718, -31294},
+{9307, -31418}, {8895, -31537},
+{8482, -31652}, {8067, -31759},
+{7650, -31862}, {7233, -31960},
+{6815, -32051}, {6393, -32138},
+{5973, -32219}, {5549, -32294},
+{5127, -32364}, {4703, -32429},
+{4278, -32487}, {3852, -32541},
+{3426, -32588}, {2999, -32630},
+{2572, -32667}, {2144, -32698},
+{1716, -32724}, {1287, -32742},
+{860, -32757}, {430, -32766},
+{0, -32767}, {-429, -32766},
+{-858, -32757}, {-1287, -32743},
+{-1715, -32724}, {-2143, -32698},
+{-2570, -32667}, {-2998, -32631},
+{-3425, -32588}, {-3851, -32541},
+{-4277, -32488}, {-4701, -32429},
+{-5125, -32364}, {-5548, -32295},
+{-5971, -32219}, {-6393, -32138},
+{-6813, -32051}, {-7231, -31960},
+{-7650, -31863}, {-8067, -31760},
+{-8481, -31652}, {-8895, -31539},
+{-9306, -31419}, {-9716, -31294},
+{-10126, -31165}, {-10532, -31030},
+{-10937, -30889}, {-11340, -30743},
+{-11741, -30592}, {-12141, -30436},
+{-12540, -30274}, {-12935, -30107},
+{-13328, -29936}, {-13718, -29758},
+{-14107, -29577}, {-14493, -29390},
+{-14875, -29197}, {-15257, -29000},
+{-15635, -28797}, {-16010, -28590},
+{-16384, -28379}, {-16753, -28162},
+{-17119, -27940}, {-17484, -27714},
+{-17845, -27482}, {-18205, -27246},
+{-18560, -27006}, {-18911, -26760},
+{-19260, -26510}, {-19606, -26257},
+{-19947, -25997}, {-20286, -25734},
+{-20621, -25466}, {-20952, -25194},
+{-21281, -24918}, {-21605, -24637},
+{-21926, -24353}, {-22242, -24063},
+{-22555, -23770}, {-22865, -23473},
+{-23171, -23171}, {-23472, -22866},
+{-23769, -22557}, {-24063, -22244},
+{-24352, -21927}, {-24636, -21606},
+{-24917, -21282}, {-25194, -20954},
+{-25465, -20622}, {-25733, -20288},
+{-25997, -19949}, {-26255, -19607},
+{-26509, -19261}, {-26760, -18914},
+{-27004, -18561}, {-27246, -18205},
+{-27481, -17846}, {-27713, -17485},
+{-27940, -17122}, {-28162, -16755},
+{-28378, -16385}, {-28590, -16012},
+{-28797, -15636}, {-28999, -15258},
+{-29197, -14878}, {-29389, -14494},
+{-29576, -14108}, {-29757, -13720},
+{-29934, -13329}, {-30107, -12937},
+{-30274, -12540}, {-30435, -12142},
+{-30592, -11744}, {-30743, -11342},
+{-30889, -10939}, {-31030, -10534},
+{-31164, -10127}, {-31294, -9718},
+{-31418, -9307}, {-31537, -8895},
+{-31652, -8482}, {-31759, -8067},
+{-31862, -7650}, {-31960, -7233},
+{-32051, -6815}, {-32138, -6393},
+{-32219, -5973}, {-32294, -5549},
+{-32364, -5127}, {-32429, -4703},
+{-32487, -4278}, {-32541, -3852},
+{-32588, -3426}, {-32630, -2999},
+{-32667, -2572}, {-32698, -2144},
+{-32724, -1716}, {-32742, -1287},
+{-32757, -860}, {-32766, -430},
+{-32767, 0}, {-32766, 429},
+{-32757, 858}, {-32743, 1287},
+{-32724, 1715}, {-32698, 2143},
+{-32667, 2570}, {-32631, 2998},
+{-32588, 3425}, {-32541, 3851},
+{-32488, 4277}, {-32429, 4701},
+{-32364, 5125}, {-32295, 5548},
+{-32219, 5971}, {-32138, 6393},
+{-32051, 6813}, {-31960, 7231},
+{-31863, 7650}, {-31760, 8067},
+{-31652, 8481}, {-31539, 8895},
+{-31419, 9306}, {-31294, 9716},
+{-31165, 10126}, {-31030, 10532},
+{-30889, 10937}, {-30743, 11340},
+{-30592, 11741}, {-30436, 12141},
+{-30274, 12540}, {-30107, 12935},
+{-29936, 13328}, {-29758, 13718},
+{-29577, 14107}, {-29390, 14493},
+{-29197, 14875}, {-29000, 15257},
+{-28797, 15635}, {-28590, 16010},
+{-28379, 16384}, {-28162, 16753},
+{-27940, 17119}, {-27714, 17484},
+{-27482, 17845}, {-27246, 18205},
+{-27006, 18560}, {-26760, 18911},
+{-26510, 19260}, {-26257, 19606},
+{-25997, 19947}, {-25734, 20286},
+{-25466, 20621}, {-25194, 20952},
+{-24918, 21281}, {-24637, 21605},
+{-24353, 21926}, {-24063, 22242},
+{-23770, 22555}, {-23473, 22865},
+{-23171, 23171}, {-22866, 23472},
+{-22557, 23769}, {-22244, 24063},
+{-21927, 24352}, {-21606, 24636},
+{-21282, 24917}, {-20954, 25194},
+{-20622, 25465}, {-20288, 25733},
+{-19949, 25997}, {-19607, 26255},
+{-19261, 26509}, {-18914, 26760},
+{-18561, 27004}, {-18205, 27246},
+{-17846, 27481}, {-17485, 27713},
+{-17122, 27940}, {-16755, 28162},
+{-16385, 28378}, {-16012, 28590},
+{-15636, 28797}, {-15258, 28999},
+{-14878, 29197}, {-14494, 29389},
+{-14108, 29576}, {-13720, 29757},
+{-13329, 29934}, {-12937, 30107},
+{-12540, 30274}, {-12142, 30435},
+{-11744, 30592}, {-11342, 30743},
+{-10939, 30889}, {-10534, 31030},
+{-10127, 31164}, {-9718, 31294},
+{-9307, 31418}, {-8895, 31537},
+{-8482, 31652}, {-8067, 31759},
+{-7650, 31862}, {-7233, 31960},
+{-6815, 32051}, {-6393, 32138},
+{-5973, 32219}, {-5549, 32294},
+{-5127, 32364}, {-4703, 32429},
+{-4278, 32487}, {-3852, 32541},
+{-3426, 32588}, {-2999, 32630},
+{-2572, 32667}, {-2144, 32698},
+{-1716, 32724}, {-1287, 32742},
+{-860, 32757}, {-430, 32766},
+{0, 32767}, {429, 32766},
+{858, 32757}, {1287, 32743},
+{1715, 32724}, {2143, 32698},
+{2570, 32667}, {2998, 32631},
+{3425, 32588}, {3851, 32541},
+{4277, 32488}, {4701, 32429},
+{5125, 32364}, {5548, 32295},
+{5971, 32219}, {6393, 32138},
+{6813, 32051}, {7231, 31960},
+{7650, 31863}, {8067, 31760},
+{8481, 31652}, {8895, 31539},
+{9306, 31419}, {9716, 31294},
+{10126, 31165}, {10532, 31030},
+{10937, 30889}, {11340, 30743},
+{11741, 30592}, {12141, 30436},
+{12540, 30274}, {12935, 30107},
+{13328, 29936}, {13718, 29758},
+{14107, 29577}, {14493, 29390},
+{14875, 29197}, {15257, 29000},
+{15635, 28797}, {16010, 28590},
+{16384, 28379}, {16753, 28162},
+{17119, 27940}, {17484, 27714},
+{17845, 27482}, {18205, 27246},
+{18560, 27006}, {18911, 26760},
+{19260, 26510}, {19606, 26257},
+{19947, 25997}, {20286, 25734},
+{20621, 25466}, {20952, 25194},
+{21281, 24918}, {21605, 24637},
+{21926, 24353}, {22242, 24063},
+{22555, 23770}, {22865, 23473},
+{23171, 23171}, {23472, 22866},
+{23769, 22557}, {24063, 22244},
+{24352, 21927}, {24636, 21606},
+{24917, 21282}, {25194, 20954},
+{25465, 20622}, {25733, 20288},
+{25997, 19949}, {26255, 19607},
+{26509, 19261}, {26760, 18914},
+{27004, 18561}, {27246, 18205},
+{27481, 17846}, {27713, 17485},
+{27940, 17122}, {28162, 16755},
+{28378, 16385}, {28590, 16012},
+{28797, 15636}, {28999, 15258},
+{29197, 14878}, {29389, 14494},
+{29576, 14108}, {29757, 13720},
+{29934, 13329}, {30107, 12937},
+{30274, 12540}, {30435, 12142},
+{30592, 11744}, {30743, 11342},
+{30889, 10939}, {31030, 10534},
+{31164, 10127}, {31294, 9718},
+{31418, 9307}, {31537, 8895},
+{31652, 8482}, {31759, 8067},
+{31862, 7650}, {31960, 7233},
+{32051, 6815}, {32138, 6393},
+{32219, 5973}, {32294, 5549},
+{32364, 5127}, {32429, 4703},
+{32487, 4278}, {32541, 3852},
+{32588, 3426}, {32630, 2999},
+{32667, 2572}, {32698, 2144},
+{32724, 1716}, {32742, 1287},
+{32757, 860}, {32766, 430},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
+450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
+345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
+215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
+110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
+430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
+325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
+181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
+76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
+396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
+291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
+161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
+56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
+362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
+257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
+127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
+22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
+472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
+342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
+237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
+93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
+438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
+308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
+203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
+73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
+418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
+274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
+169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
+39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
+384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
+254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
+149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
+225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
+170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
+115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
+46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
+216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
+161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
+92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
+37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
+207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
+138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
+83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
+28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
+184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
+129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
+74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
+110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
+76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
+56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
+22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
+93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
+73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
+39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
+46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
+37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
+28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480, /* nfft */
+-1, /* shift */
+{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev480, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240, /* nfft */
+1, /* shift */
+{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev240, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120, /* nfft */
+2, /* shift */
+{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev120, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60, /* nfft */
+3, /* shift */
+{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev60, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[481] = {
+32767, 32767, 32767, 32767, 32766,
+32763, 32762, 32759, 32757, 32753,
+32751, 32747, 32743, 32738, 32733,
+32729, 32724, 32717, 32711, 32705,
+32698, 32690, 32683, 32676, 32667,
+32658, 32650, 32640, 32631, 32620,
+32610, 32599, 32588, 32577, 32566,
+32554, 32541, 32528, 32515, 32502,
+32487, 32474, 32459, 32444, 32429,
+32413, 32397, 32381, 32364, 32348,
+32331, 32313, 32294, 32277, 32257,
+32239, 32219, 32200, 32180, 32159,
+32138, 32118, 32096, 32074, 32051,
+32029, 32006, 31984, 31960, 31936,
+31912, 31888, 31863, 31837, 31812,
+31786, 31760, 31734, 31707, 31679,
+31652, 31624, 31596, 31567, 31539,
+31508, 31479, 31450, 31419, 31388,
+31357, 31326, 31294, 31262, 31230,
+31198, 31164, 31131, 31097, 31063,
+31030, 30994, 30959, 30924, 30889,
+30853, 30816, 30779, 30743, 30705,
+30668, 30629, 30592, 30553, 30515,
+30475, 30435, 30396, 30356, 30315,
+30274, 30233, 30191, 30149, 30107,
+30065, 30022, 29979, 29936, 29891,
+29847, 29803, 29758, 29713, 29668,
+29622, 29577, 29529, 29483, 29436,
+29390, 29341, 29293, 29246, 29197,
+29148, 29098, 29050, 29000, 28949,
+28899, 28848, 28797, 28746, 28694,
+28642, 28590, 28537, 28485, 28432,
+28378, 28324, 28271, 28217, 28162,
+28106, 28051, 27995, 27940, 27884,
+27827, 27770, 27713, 27657, 27598,
+27540, 27481, 27423, 27365, 27305,
+27246, 27187, 27126, 27066, 27006,
+26945, 26883, 26822, 26760, 26698,
+26636, 26574, 26510, 26448, 26383,
+26320, 26257, 26191, 26127, 26062,
+25997, 25931, 25866, 25800, 25734,
+25667, 25601, 25533, 25466, 25398,
+25330, 25262, 25194, 25125, 25056,
+24987, 24917, 24848, 24778, 24707,
+24636, 24566, 24495, 24424, 24352,
+24280, 24208, 24135, 24063, 23990,
+23917, 23842, 23769, 23695, 23622,
+23546, 23472, 23398, 23322, 23246,
+23171, 23095, 23018, 22942, 22866,
+22788, 22711, 22634, 22557, 22478,
+22400, 22322, 22244, 22165, 22085,
+22006, 21927, 21846, 21766, 21687,
+21606, 21524, 21443, 21363, 21282,
+21199, 21118, 21035, 20954, 20870,
+20788, 20705, 20621, 20538, 20455,
+20371, 20286, 20202, 20118, 20034,
+19947, 19863, 19777, 19692, 19606,
+19520, 19434, 19347, 19260, 19174,
+19088, 18999, 18911, 18825, 18737,
+18648, 18560, 18472, 18384, 18294,
+18205, 18116, 18025, 17936, 17846,
+17757, 17666, 17576, 17485, 17395,
+17303, 17212, 17122, 17030, 16937,
+16846, 16755, 16662, 16569, 16477,
+16385, 16291, 16198, 16105, 16012,
+15917, 15824, 15730, 15636, 15541,
+15447, 15352, 15257, 15162, 15067,
+14973, 14875, 14781, 14685, 14589,
+14493, 14396, 14300, 14204, 14107,
+14010, 13914, 13815, 13718, 13621,
+13524, 13425, 13328, 13230, 13133,
+13033, 12935, 12836, 12738, 12638,
+12540, 12441, 12341, 12241, 12142,
+12044, 11943, 11843, 11744, 11643,
+11542, 11442, 11342, 11241, 11139,
+11039, 10939, 10836, 10736, 10635,
+10534, 10431, 10330, 10228, 10127,
+10024, 9921, 9820, 9718, 9614,
+9512, 9410, 9306, 9204, 9101,
+8998, 8895, 8791, 8689, 8585,
+8481, 8377, 8274, 8171, 8067,
+7962, 7858, 7753, 7650, 7545,
+7441, 7336, 7231, 7129, 7023,
+6917, 6813, 6709, 6604, 6498,
+6393, 6288, 6182, 6077, 5973,
+5867, 5760, 5656, 5549, 5445,
+5339, 5232, 5127, 5022, 4914,
+4809, 4703, 4596, 4490, 4384,
+4278, 4171, 4065, 3958, 3852,
+3745, 3640, 3532, 3426, 3318,
+3212, 3106, 2998, 2891, 2786,
+2679, 2570, 2465, 2358, 2251,
+2143, 2037, 1929, 1823, 1715,
+1609, 1501, 1393, 1287, 1180,
+1073, 964, 858, 751, 644,
+535, 429, 322, 214, 107,
+0, };
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000, /* Fs */
+120, /* overlap */
+21, /* nbEBands */
+21, /* effEBands */
+{27853, 0, 4096, 8192, }, /* preemph */
+eband5ms, /* eBands */
+3, /* maxLM */
+8, /* nbShortMdcts */
+120, /* shortMdctSize */
+11, /* nbAllocVectors */
+band_allocation, /* allocVectors */
+logN400, /* logN */
+window120, /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50}, /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/drivers/opus/celt/static_modes_float.h b/drivers/opus/celt/static_modes_float.h
new file mode 100644
index 0000000000..fe6bb4c8a3
--- /dev/null
+++ b/drivers/opus/celt/static_modes_float.h
@@ -0,0 +1,599 @@
+/* The contents of this file was automatically generated by dump_modes.c
+ with arguments: 48000 960
+ It contains static definitions for some pre-defined modes. */
+#include "opus_modes.h"
+#include "rate.h"
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
+0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
+0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
+0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f,
+0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f,
+0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f,
+0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f,
+0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f,
+0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f,
+0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f,
+0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f,
+0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f,
+0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f,
+0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f,
+0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f,
+0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f,
+0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f,
+0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f,
+0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f,
+0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f,
+0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
+0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
+0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
+0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f},
+{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f},
+{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f},
+{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f},
+{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f},
+{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f},
+{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f},
+{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f},
+{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f},
+{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f},
+{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f},
+{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f},
+{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f},
+{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f},
+{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f},
+{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f},
+{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f},
+{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f},
+{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f},
+{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f},
+{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f},
+{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f},
+{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f},
+{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f},
+{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f},
+{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f},
+{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f},
+{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f},
+{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f},
+{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f},
+{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f},
+{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f},
+{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f},
+{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f},
+{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f},
+{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f},
+{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f},
+{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f},
+{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f},
+{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f},
+{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f},
+{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f},
+{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f},
+{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f},
+{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f},
+{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f},
+{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f},
+{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f},
+{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f},
+{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f},
+{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f},
+{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f},
+{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f},
+{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f},
+{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f},
+{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f},
+{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f},
+{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f},
+{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f},
+{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f},
+{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f},
+{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f},
+{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f},
+{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f},
+{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f},
+{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f},
+{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f},
+{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f},
+{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f},
+{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f},
+{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f},
+{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f},
+{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f},
+{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f},
+{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f},
+{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f},
+{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f},
+{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f},
+{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f},
+{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f},
+{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f},
+{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f},
+{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f},
+{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f},
+{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f},
+{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f},
+{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f},
+{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f},
+{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f},
+{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f},
+{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f},
+{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f},
+{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f},
+{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f},
+{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f},
+{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f},
+{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f},
+{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f},
+{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f},
+{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f},
+{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f},
+{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f},
+{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f},
+{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f},
+{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f},
+{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f},
+{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f},
+{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f},
+{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f},
+{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f},
+{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f},
+{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f},
+{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f},
+{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f},
+{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f},
+{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f},
+{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f},
+{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f},
+{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f},
+{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f},
+{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f},
+{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f},
+{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f},
+{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f},
+{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f},
+{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f},
+{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f},
+{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f},
+{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f},
+{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f},
+{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f},
+{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f},
+{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f},
+{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f},
+{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f},
+{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f},
+{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f},
+{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f},
+{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f},
+{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f},
+{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f},
+{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f},
+{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f},
+{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f},
+{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f},
+{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f},
+{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f},
+{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f},
+{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f},
+{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f},
+{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f},
+{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f},
+{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f},
+{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f},
+{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f},
+{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f},
+{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f},
+{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f},
+{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f},
+{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f},
+{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f},
+{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f},
+{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f},
+{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f},
+{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f},
+{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f},
+{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f},
+{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f},
+{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f},
+{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f},
+{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f},
+{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f},
+{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f},
+{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f},
+{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f},
+{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f},
+{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f},
+{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f},
+{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f},
+{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f},
+{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f},
+{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f},
+{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f},
+{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f},
+{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f},
+{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f},
+{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f},
+{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f},
+{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f},
+{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f},
+{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f},
+{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f},
+{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f},
+{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f},
+{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f},
+{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f},
+{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f},
+{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f},
+{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f},
+{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f},
+{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f},
+{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f},
+{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f},
+{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f},
+{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f},
+{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f},
+{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f},
+{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f},
+{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f},
+{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f},
+{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f},
+{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f},
+{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f},
+{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f},
+{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f},
+{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f},
+{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f},
+{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f},
+{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f},
+{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f},
+{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f},
+{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f},
+{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f},
+{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f},
+{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f},
+{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f},
+{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f},
+{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f},
+{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f},
+{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f},
+{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f},
+{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f},
+{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f},
+{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f},
+{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f},
+{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f},
+{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f},
+{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f},
+{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f},
+{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
+450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
+345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
+215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
+110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
+430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
+325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
+181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
+76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
+396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
+291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
+161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
+56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
+362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
+257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
+127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
+22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
+472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
+342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
+237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
+93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
+438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
+308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
+203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
+73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
+418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
+274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
+169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
+39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
+384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
+254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
+149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
+225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
+170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
+115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
+46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
+216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
+161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
+92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
+37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
+207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
+138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
+83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
+28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
+184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
+129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
+74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
+110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
+76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
+56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
+22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
+93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
+73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
+39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
+46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
+37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
+28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480, /* nfft */
+0.002083333f, /* scale */
+-1, /* shift */
+{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev480, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240, /* nfft */
+0.004166667f, /* scale */
+1, /* shift */
+{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev240, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120, /* nfft */
+0.008333333f, /* scale */
+2, /* shift */
+{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev120, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60, /* nfft */
+0.016666667f, /* scale */
+3, /* shift */
+{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev60, /* bitrev */
+fft_twiddles48000_960, /* bitrev */
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[481] = {
+1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f,
+0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f,
+0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f,
+0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f,
+0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f,
+0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f,
+0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f,
+0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f,
+0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f,
+0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f,
+0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f,
+0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f,
+0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f,
+0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f,
+0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f,
+0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f,
+0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f,
+0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f,
+0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f,
+0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f,
+0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f,
+0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f,
+0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f,
+0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f,
+0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f,
+0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f,
+0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f,
+0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f,
+0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f,
+0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f,
+0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f,
+0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f,
+0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f,
+0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f,
+0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f,
+0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f,
+0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f,
+0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f,
+0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f,
+0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f,
+0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f,
+0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f,
+0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f,
+0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f,
+0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f,
+0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f,
+0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f,
+0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f,
+0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f,
+0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f,
+0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f,
+0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f,
+0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f,
+0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f,
+0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f,
+0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f,
+0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f,
+0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f,
+0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f,
+0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f,
+0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f,
+0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f,
+0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f,
+0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f,
+0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f,
+0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f,
+0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f,
+0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f,
+0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f,
+0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f,
+0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f,
+0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f,
+0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f,
+0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f,
+0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f,
+0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f,
+0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f,
+0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f,
+0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f,
+0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f,
+0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f,
+0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f,
+0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f,
+0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f,
+0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f,
+0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f,
+0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f,
+0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f,
+0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f,
+0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f,
+0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f,
+0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f,
+0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f,
+0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f,
+0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f,
+0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f,
+-4.3711390e-08f, };
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000, /* Fs */
+120, /* overlap */
+21, /* nbEBands */
+21, /* effEBands */
+{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, }, /* preemph */
+eband5ms, /* eBands */
+3, /* maxLM */
+8, /* nbShortMdcts */
+120, /* shortMdctSize */
+11, /* nbAllocVectors */
+band_allocation, /* allocVectors */
+logN400, /* logN */
+window120, /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50}, /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/drivers/opus/celt/tests/test_unit_cwrs32.c b/drivers/opus/celt/tests/test_unit_cwrs32.c
new file mode 100644
index 0000000000..9cf124336a
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_cwrs32.c
@@ -0,0 +1,161 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
+ Gregory Maxwell
+ Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#else
+#define TEST_CUSTOM_MODES
+#endif
+
+#define CELT_C
+#include "stack_alloc.h"
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "cwrs.c"
+#include "mathops.c"
+#include "rate.h"
+
+#define NMAX (240)
+#define KMAX (128)
+
+#ifdef TEST_CUSTOM_MODES
+
+#define NDIMS (44)
+static const int pn[NDIMS]={
+ 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 18, 20, 22,
+ 24, 26, 28, 30, 32, 36, 40, 44, 48,
+ 52, 56, 60, 64, 72, 80, 88, 96, 104,
+ 112, 120, 128, 144, 160, 176, 192, 208
+};
+static const int pkmax[NDIMS]={
+ 128, 128, 128, 128, 88, 52, 36, 26, 22,
+ 18, 16, 15, 13, 12, 12, 11, 10, 9,
+ 9, 8, 8, 7, 7, 7, 7, 6, 6,
+ 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 4, 4, 4, 4, 4, 4, 4, 4
+};
+
+#else /* TEST_CUSTOM_MODES */
+
+#define NDIMS (22)
+static const int pn[NDIMS]={
+ 2, 3, 4, 6, 8, 9, 11, 12, 16,
+ 18, 22, 24, 32, 36, 44, 48, 64, 72,
+ 88, 96, 144, 176
+};
+static const int pkmax[NDIMS]={
+ 128, 128, 128, 88, 36, 26, 18, 16, 12,
+ 11, 9, 9, 7, 7, 6, 6, 5, 5,
+ 5, 5, 4, 4
+};
+
+#endif
+
+int main(void){
+ int t;
+ int n;
+ ALLOC_STACK;
+ for(t=0;t<NDIMS;t++){
+ int pseudo;
+ n=pn[t];
+ for(pseudo=1;pseudo<41;pseudo++)
+ {
+ int k;
+#if defined(SMALL_FOOTPRINT)
+ opus_uint32 uu[KMAX+2U];
+#endif
+ opus_uint32 inc;
+ opus_uint32 nc;
+ opus_uint32 i;
+ k=get_pulses(pseudo);
+ if (k>pkmax[t])break;
+ printf("Testing CWRS with N=%i, K=%i...\n",n,k);
+#if defined(SMALL_FOOTPRINT)
+ nc=ncwrs_urow(n,k,uu);
+#else
+ nc=CELT_PVQ_V(n,k);
+#endif
+ inc=nc/20000;
+ if(inc<1)inc=1;
+ for(i=0;i<nc;i+=inc){
+#if defined(SMALL_FOOTPRINT)
+ opus_uint32 u[KMAX+2U];
+#endif
+ int y[NMAX];
+ int sy;
+ opus_uint32 v;
+ opus_uint32 ii;
+ int j;
+#if defined(SMALL_FOOTPRINT)
+ memcpy(u,uu,(k+2U)*sizeof(*u));
+ cwrsi(n,k,i,y,u);
+#else
+ cwrsi(n,k,i,y);
+#endif
+ sy=0;
+ for(j=0;j<n;j++)sy+=ABS(y[j]);
+ if(sy!=k){
+ fprintf(stderr,"N=%d Pulse count mismatch in cwrsi (%d!=%d).\n",
+ n,sy,k);
+ return 99;
+ }
+ /*printf("%6u of %u:",i,nc);
+ for(j=0;j<n;j++)printf(" %+3i",y[j]);
+ printf(" ->");*/
+#if defined(SMALL_FOOTPRINT)
+ ii=icwrs(n,k,&v,y,u);
+#else
+ ii=icwrs(n,y);
+ v=CELT_PVQ_V(n,k);
+#endif
+ if(ii!=i){
+ fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n",
+ (long)ii,(long)i);
+ return 1;
+ }
+ if(v!=nc){
+ fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n",
+ (long)v,(long)nc);
+ return 2;
+ }
+ /*printf(" %6u\n",i);*/
+ }
+ /*printf("\n");*/
+ }
+ }
+ return 0;
+}
diff --git a/drivers/opus/celt/tests/test_unit_dft.c b/drivers/opus/celt/tests/test_unit_dft.c
new file mode 100644
index 0000000000..4a00013b2a
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_dft.c
@@ -0,0 +1,164 @@
+/* Copyright (c) 2008 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define SKIP_CONFIG_H
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#include <stdio.h>
+
+#define CELT_C
+#include "stack_alloc.h"
+#include "kiss_fft.h"
+#include "kiss_fft.c"
+#include "mathops.c"
+#include "entcode.c"
+
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+int ret = 0;
+
+void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse)
+{
+ int bin,k;
+ double errpow=0,sigpow=0, snr;
+
+ for (bin=0;bin<nfft;++bin) {
+ double ansr = 0;
+ double ansi = 0;
+ double difr;
+ double difi;
+
+ for (k=0;k<nfft;++k) {
+ double phase = -2*M_PI*bin*k/nfft;
+ double re = cos(phase);
+ double im = sin(phase);
+ if (isinverse)
+ im = -im;
+
+ if (!isinverse)
+ {
+ re /= nfft;
+ im /= nfft;
+ }
+
+ ansr += in[k].r * re - in[k].i * im;
+ ansi += in[k].r * im + in[k].i * re;
+ }
+ /*printf ("%d %d ", (int)ansr, (int)ansi);*/
+ difr = ansr - out[bin].r;
+ difi = ansi - out[bin].i;
+ errpow += difr*difr + difi*difi;
+ sigpow += ansr*ansr+ansi*ansi;
+ }
+ snr = 10*log10(sigpow/errpow);
+ printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+ if (snr<60) {
+ printf( "** poor snr: %f ** \n", snr);
+ ret = 1;
+ }
+}
+
+void test1d(int nfft,int isinverse)
+{
+ size_t buflen = sizeof(kiss_fft_cpx)*nfft;
+
+ kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen);
+ kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen);
+ kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0);
+ int k;
+
+ for (k=0;k<nfft;++k) {
+ in[k].r = (rand() % 32767) - 16384;
+ in[k].i = (rand() % 32767) - 16384;
+ }
+
+ for (k=0;k<nfft;++k) {
+ in[k].r *= 32768;
+ in[k].i *= 32768;
+ }
+
+ if (isinverse)
+ {
+ for (k=0;k<nfft;++k) {
+ in[k].r /= nfft;
+ in[k].i /= nfft;
+ }
+ }
+
+ /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
+
+ if (isinverse)
+ opus_ifft(cfg,in,out);
+ else
+ opus_fft(cfg,in,out);
+
+ /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
+
+ check(in,out,nfft,isinverse);
+
+ free(in);
+ free(out);
+ free(cfg);
+}
+
+int main(int argc,char ** argv)
+{
+ ALLOC_STACK;
+ if (argc>1) {
+ int k;
+ for (k=1;k<argc;++k) {
+ test1d(atoi(argv[k]),0);
+ test1d(atoi(argv[k]),1);
+ }
+ }else{
+ test1d(32,0);
+ test1d(32,1);
+ test1d(128,0);
+ test1d(128,1);
+ test1d(256,0);
+ test1d(256,1);
+#ifndef RADIX_TWO_ONLY
+ test1d(36,0);
+ test1d(36,1);
+ test1d(50,0);
+ test1d(50,1);
+ test1d(120,0);
+ test1d(120,1);
+#endif
+ }
+ return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_entropy.c b/drivers/opus/celt/tests/test_unit_entropy.c
new file mode 100644
index 0000000000..62268b1564
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_entropy.c
@@ -0,0 +1,382 @@
+/* Copyright (c) 2007-2011 Xiph.Org Foundation, Mozilla Corporation,
+ Gregory Maxwell
+ Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include "entcode.h"
+#include "entenc.h"
+#include "entdec.h"
+#include <string.h>
+
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+
+#ifndef M_LOG2E
+# define M_LOG2E 1.4426950408889634074
+#endif
+#define DATA_SIZE 10000000
+#define DATA_SIZE2 10000
+
+int main(int _argc,char **_argv){
+ ec_enc enc;
+ ec_dec dec;
+ long nbits;
+ long nbits2;
+ double entropy;
+ int ft;
+ int ftb;
+ int sz;
+ int i;
+ int ret;
+ unsigned int sym;
+ unsigned int seed;
+ unsigned char *ptr;
+ const char *env_seed;
+ ret=0;
+ entropy=0;
+ if (_argc > 2) {
+ fprintf(stderr, "Usage: %s [<seed>]\n", _argv[0]);
+ return 1;
+ }
+ env_seed = getenv("SEED");
+ if (_argc > 1)
+ seed = atoi(_argv[1]);
+ else if (env_seed)
+ seed = atoi(env_seed);
+ else
+ seed = time(NULL);
+ /*Testing encoding of raw bit values.*/
+ ptr = (unsigned char *)malloc(DATA_SIZE);
+ ec_enc_init(&enc,ptr, DATA_SIZE);
+ for(ft=2;ft<1024;ft++){
+ for(i=0;i<ft;i++){
+ entropy+=log(ft)*M_LOG2E;
+ ec_enc_uint(&enc,i,ft);
+ }
+ }
+ /*Testing encoding of raw bit values.*/
+ for(ftb=1;ftb<16;ftb++){
+ for(i=0;i<(1<<ftb);i++){
+ entropy+=ftb;
+ nbits=ec_tell(&enc);
+ ec_enc_bits(&enc,i,ftb);
+ nbits2=ec_tell(&enc);
+ if(nbits2-nbits!=ftb){
+ fprintf(stderr,"Used %li bits to encode %i bits directly.\n",
+ nbits2-nbits,ftb);
+ ret=-1;
+ }
+ }
+ }
+ nbits=ec_tell_frac(&enc);
+ ec_enc_done(&enc);
+ fprintf(stderr,
+ "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n",
+ entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits);
+ fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc));
+ ec_dec_init(&dec,ptr,DATA_SIZE);
+ for(ft=2;ft<1024;ft++){
+ for(i=0;i<ft;i++){
+ sym=ec_dec_uint(&dec,ft);
+ if(sym!=(unsigned)i){
+ fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft);
+ ret=-1;
+ }
+ }
+ }
+ for(ftb=1;ftb<16;ftb++){
+ for(i=0;i<(1<<ftb);i++){
+ sym=ec_dec_bits(&dec,ftb);
+ if(sym!=(unsigned)i){
+ fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb);
+ ret=-1;
+ }
+ }
+ }
+ nbits2=ec_tell_frac(&dec);
+ if(nbits!=nbits2){
+ fprintf(stderr,
+ "Reported number of bits used was %0.2lf, should be %0.2lf.\n",
+ ldexp(nbits2,-3),ldexp(nbits,-3));
+ ret=-1;
+ }
+ /*Testing an encoder bust prefers range coder data over raw bits.
+ This isn't a general guarantee, will only work for data that is buffered in
+ the encoder state and not yet stored in the user buffer, and should never
+ get used in practice.
+ It's mostly here for code coverage completeness.*/
+ /*Start with a 16-bit buffer.*/
+ ec_enc_init(&enc,ptr,2);
+ /*Write 7 raw bits.*/
+ ec_enc_bits(&enc,0x55,7);
+ /*Write 12.3 bits of range coder data.*/
+ ec_enc_uint(&enc,1,2);
+ ec_enc_uint(&enc,1,3);
+ ec_enc_uint(&enc,1,4);
+ ec_enc_uint(&enc,1,5);
+ ec_enc_uint(&enc,2,6);
+ ec_enc_uint(&enc,6,7);
+ ec_enc_done(&enc);
+ ec_dec_init(&dec,ptr,2);
+ if(!enc.error
+ /*The raw bits should have been overwritten by the range coder data.*/
+ ||ec_dec_bits(&dec,7)!=0x05
+ /*And all the range coder data should have been encoded correctly.*/
+ ||ec_dec_uint(&dec,2)!=1
+ ||ec_dec_uint(&dec,3)!=1
+ ||ec_dec_uint(&dec,4)!=1
+ ||ec_dec_uint(&dec,5)!=1
+ ||ec_dec_uint(&dec,6)!=2
+ ||ec_dec_uint(&dec,7)!=6){
+ fprintf(stderr,"Encoder bust overwrote range coder data with raw bits.\n");
+ ret=-1;
+ }
+ srand(seed);
+ fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536);
+ for(i=0;i<409600;i++){
+ unsigned *data;
+ unsigned *tell;
+ unsigned tell_bits;
+ int j;
+ int zeros;
+ ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10;
+ sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
+ data=(unsigned *)malloc(sz*sizeof(*data));
+ tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
+ ec_enc_init(&enc,ptr,DATA_SIZE2);
+ zeros = rand()%13==0;
+ tell[0]=ec_tell_frac(&enc);
+ for(j=0;j<sz;j++){
+ if (zeros)
+ data[j]=0;
+ else
+ data[j]=rand()%ft;
+ ec_enc_uint(&enc,data[j],ft);
+ tell[j+1]=ec_tell_frac(&enc);
+ }
+ if (rand()%2==0)
+ while(ec_tell(&enc)%8 != 0)
+ ec_enc_uint(&enc, rand()%2, 2);
+ tell_bits = ec_tell(&enc);
+ ec_enc_done(&enc);
+ if(tell_bits!=(unsigned)ec_tell(&enc)){
+ fprintf(stderr,"ec_tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n",
+ ec_tell(&enc),tell_bits,seed);
+ ret=-1;
+ }
+ if ((tell_bits+7)/8 < ec_range_bytes(&enc))
+ {
+ fprintf (stderr, "ec_tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
+ ec_range_bytes(&enc), (tell_bits+7)/8,seed);
+ ret=-1;
+ }
+ ec_dec_init(&dec,ptr,DATA_SIZE2);
+ if(ec_tell_frac(&dec)!=tell[0]){
+ fprintf(stderr,
+ "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+ 0,ec_tell_frac(&dec),tell[0],seed);
+ }
+ for(j=0;j<sz;j++){
+ sym=ec_dec_uint(&dec,ft);
+ if(sym!=data[j]){
+ fprintf(stderr,
+ "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n",
+ sym,data[j],ft,j,sz,seed);
+ ret=-1;
+ }
+ if(ec_tell_frac(&dec)!=tell[j+1]){
+ fprintf(stderr,
+ "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+ j+1,ec_tell_frac(&dec),tell[j+1],seed);
+ }
+ }
+ free(tell);
+ free(data);
+ }
+ /*Test compatibility between multiple different encode/decode routines.*/
+ for(i=0;i<409600;i++){
+ unsigned *logp1;
+ unsigned *data;
+ unsigned *tell;
+ unsigned *enc_method;
+ int j;
+ sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
+ logp1=(unsigned *)malloc(sz*sizeof(*logp1));
+ data=(unsigned *)malloc(sz*sizeof(*data));
+ tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
+ enc_method=(unsigned *)malloc(sz*sizeof(*enc_method));
+ ec_enc_init(&enc,ptr,DATA_SIZE2);
+ tell[0]=ec_tell_frac(&enc);
+ for(j=0;j<sz;j++){
+ data[j]=rand()/((RAND_MAX>>1)+1);
+ logp1[j]=(rand()%15)+1;
+ enc_method[j]=rand()/((RAND_MAX>>2)+1);
+ switch(enc_method[j]){
+ case 0:{
+ ec_encode(&enc,data[j]?(1<<logp1[j])-1:0,
+ (1<<logp1[j])-(data[j]?0:1),1<<logp1[j]);
+ }break;
+ case 1:{
+ ec_encode_bin(&enc,data[j]?(1<<logp1[j])-1:0,
+ (1<<logp1[j])-(data[j]?0:1),logp1[j]);
+ }break;
+ case 2:{
+ ec_enc_bit_logp(&enc,data[j],logp1[j]);
+ }break;
+ case 3:{
+ unsigned char icdf[2];
+ icdf[0]=1;
+ icdf[1]=0;
+ ec_enc_icdf(&enc,data[j],icdf,logp1[j]);
+ }break;
+ }
+ tell[j+1]=ec_tell_frac(&enc);
+ }
+ ec_enc_done(&enc);
+ if((ec_tell(&enc)+7U)/8U<ec_range_bytes(&enc)){
+ fprintf(stderr,"tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
+ ec_range_bytes(&enc),(ec_tell(&enc)+7)/8,seed);
+ ret=-1;
+ }
+ ec_dec_init(&dec,ptr,DATA_SIZE2);
+ if(ec_tell_frac(&dec)!=tell[0]){
+ fprintf(stderr,
+ "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+ 0,ec_tell_frac(&dec),tell[0],seed);
+ }
+ for(j=0;j<sz;j++){
+ int fs;
+ int dec_method;
+ dec_method=rand()/((RAND_MAX>>2)+1);
+ switch(dec_method){
+ case 0:{
+ fs=ec_decode(&dec,1<<logp1[j]);
+ sym=fs>=(1<<logp1[j])-1;
+ ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
+ (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
+ }break;
+ case 1:{
+ fs=ec_decode_bin(&dec,logp1[j]);
+ sym=fs>=(1<<logp1[j])-1;
+ ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
+ (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
+ }break;
+ case 2:{
+ sym=ec_dec_bit_logp(&dec,logp1[j]);
+ }break;
+ case 3:{
+ unsigned char icdf[2];
+ icdf[0]=1;
+ icdf[1]=0;
+ sym=ec_dec_icdf(&dec,icdf,logp1[j]);
+ }break;
+ }
+ if(sym!=data[j]){
+ fprintf(stderr,
+ "Decoded %i instead of %i with logp1 of %i at position %i of %i (Random seed: %u).\n",
+ sym,data[j],logp1[j],j,sz,seed);
+ fprintf(stderr,"Encoding method: %i, decoding method: %i\n",
+ enc_method[j],dec_method);
+ ret=-1;
+ }
+ if(ec_tell_frac(&dec)!=tell[j+1]){
+ fprintf(stderr,
+ "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+ j+1,ec_tell_frac(&dec),tell[j+1],seed);
+ }
+ }
+ free(enc_method);
+ free(tell);
+ free(data);
+ free(logp1);
+ }
+ ec_enc_init(&enc,ptr,DATA_SIZE2);
+ ec_enc_bit_logp(&enc,0,1);
+ ec_enc_bit_logp(&enc,0,1);
+ ec_enc_bit_logp(&enc,0,1);
+ ec_enc_bit_logp(&enc,0,1);
+ ec_enc_bit_logp(&enc,0,2);
+ ec_enc_patch_initial_bits(&enc,3,2);
+ if(enc.error){
+ fprintf(stderr,"patch_initial_bits failed");
+ ret=-1;
+ }
+ ec_enc_patch_initial_bits(&enc,0,5);
+ if(!enc.error){
+ fprintf(stderr,"patch_initial_bits didn't fail when it should have");
+ ret=-1;
+ }
+ ec_enc_done(&enc);
+ if(ec_range_bytes(&enc)!=1||ptr[0]!=192){
+ fprintf(stderr,"Got %d when expecting 192 for patch_initial_bits",ptr[0]);
+ ret=-1;
+ }
+ ec_enc_init(&enc,ptr,DATA_SIZE2);
+ ec_enc_bit_logp(&enc,0,1);
+ ec_enc_bit_logp(&enc,0,1);
+ ec_enc_bit_logp(&enc,1,6);
+ ec_enc_bit_logp(&enc,0,2);
+ ec_enc_patch_initial_bits(&enc,0,2);
+ if(enc.error){
+ fprintf(stderr,"patch_initial_bits failed");
+ ret=-1;
+ }
+ ec_enc_done(&enc);
+ if(ec_range_bytes(&enc)!=2||ptr[0]!=63){
+ fprintf(stderr,"Got %d when expecting 63 for patch_initial_bits",ptr[0]);
+ ret=-1;
+ }
+ ec_enc_init(&enc,ptr,2);
+ ec_enc_bit_logp(&enc,0,2);
+ for(i=0;i<48;i++){
+ ec_enc_bits(&enc,0,1);
+ }
+ ec_enc_done(&enc);
+ if(!enc.error){
+ fprintf(stderr,"Raw bits overfill didn't fail when it should have");
+ ret=-1;
+ }
+ ec_enc_init(&enc,ptr,2);
+ for(i=0;i<17;i++){
+ ec_enc_bits(&enc,0,1);
+ }
+ ec_enc_done(&enc);
+ if(!enc.error){
+ fprintf(stderr,"17 raw bits encoded in two bytes");
+ ret=-1;
+ }
+ free(ptr);
+ return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_laplace.c b/drivers/opus/celt/tests/test_unit_laplace.c
new file mode 100644
index 0000000000..af7d471045
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_laplace.c
@@ -0,0 +1,92 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation
+ Written by Jean-Marc Valin and Timothy B. Terriberry */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "laplace.h"
+#define CELT_C
+#include "stack_alloc.h"
+
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "laplace.c"
+
+#define DATA_SIZE 40000
+
+int ec_laplace_get_start_freq(int decay)
+{
+ opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1);
+ int fs = (ft*(16384-decay))/(16384+decay);
+ return fs+LAPLACE_MINP;
+}
+
+int main(void)
+{
+ int i;
+ int ret = 0;
+ ec_enc enc;
+ ec_dec dec;
+ unsigned char *ptr;
+ int val[10000], decay[10000];
+ ALLOC_STACK;
+ ptr = (unsigned char *)malloc(DATA_SIZE);
+ ec_enc_init(&enc,ptr,DATA_SIZE);
+
+ val[0] = 3; decay[0] = 6000;
+ val[1] = 0; decay[1] = 5800;
+ val[2] = -1; decay[2] = 5600;
+ for (i=3;i<10000;i++)
+ {
+ val[i] = rand()%15-7;
+ decay[i] = rand()%11000+5000;
+ }
+ for (i=0;i<10000;i++)
+ ec_laplace_encode(&enc, &val[i],
+ ec_laplace_get_start_freq(decay[i]), decay[i]);
+
+ ec_enc_done(&enc);
+
+ ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
+
+ for (i=0;i<10000;i++)
+ {
+ int d = ec_laplace_decode(&dec,
+ ec_laplace_get_start_freq(decay[i]), decay[i]);
+ if (d != val[i])
+ {
+ fprintf (stderr, "Got %d instead of %d\n", d, val[i]);
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_mathops.c b/drivers/opus/celt/tests/test_unit_mathops.c
new file mode 100644
index 0000000000..36d6a4bfb4
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_mathops.c
@@ -0,0 +1,275 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
+ Gregory Maxwell
+ Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#define CELT_C
+
+#include "mathops.c"
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "bands.c"
+#include "quant_bands.c"
+#include "laplace.c"
+#include "vq.c"
+#include "cwrs.c"
+#include <stdio.h>
+#include <math.h>
+
+#ifdef OPUS_FIXED_POINT
+#define WORD "%d"
+#else
+#define WORD "%f"
+#endif
+
+int ret = 0;
+
+void testdiv(void)
+{
+ opus_int32 i;
+ for (i=1;i<=327670;i++)
+ {
+ double prod;
+ opus_val32 val;
+ val = celt_rcp(i);
+#ifdef OPUS_FIXED_POINT
+ prod = (1./32768./65526.)*val*i;
+#else
+ prod = val*i;
+#endif
+ if (fabs(prod-1) > .00025)
+ {
+ fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod);
+ ret = 1;
+ }
+ }
+}
+
+void testsqrt(void)
+{
+ opus_int32 i;
+ for (i=1;i<=1000000000;i++)
+ {
+ double ratio;
+ opus_val16 val;
+ val = celt_sqrt(i);
+ ratio = val/sqrt(i);
+ if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2)
+ {
+ fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
+ ret = 1;
+ }
+ i+= i>>10;
+ }
+}
+
+void testbitexactcos(void)
+{
+ int i;
+ opus_int32 min_d,max_d,last,chk;
+ chk=max_d=0;
+ last=min_d=32767;
+ for(i=64;i<=16320;i++)
+ {
+ opus_int32 d;
+ opus_int32 q=bitexact_cos(i);
+ chk ^= q*i;
+ d = last - q;
+ if (d>max_d)max_d=d;
+ if (d<min_d)min_d=d;
+ last = q;
+ }
+ if ((chk!=89408644)||(max_d!=5)||(min_d!=0)||(bitexact_cos(64)!=32767)||
+ (bitexact_cos(16320)!=200)||(bitexact_cos(8192)!=23171))
+ {
+ fprintf (stderr, "bitexact_cos failed\n");
+ ret = 1;
+ }
+}
+
+void testbitexactlog2tan(void)
+{
+ int i,fail;
+ opus_int32 min_d,max_d,last,chk;
+ fail=chk=max_d=0;
+ last=min_d=15059;
+ for(i=64;i<8193;i++)
+ {
+ opus_int32 d;
+ opus_int32 mid=bitexact_cos(i);
+ opus_int32 side=bitexact_cos(16384-i);
+ opus_int32 q=bitexact_log2tan(mid,side);
+ chk ^= q*i;
+ d = last - q;
+ if (q!=-1*bitexact_log2tan(side,mid))
+ fail = 1;
+ if (d>max_d)max_d=d;
+ if (d<min_d)min_d=d;
+ last = q;
+ }
+ if ((chk!=15821257)||(max_d!=61)||(min_d!=-2)||fail||
+ (bitexact_log2tan(32767,200)!=15059)||(bitexact_log2tan(30274,12540)!=2611)||
+ (bitexact_log2tan(23171,23171)!=0))
+ {
+ fprintf (stderr, "bitexact_log2tan failed\n");
+ ret = 1;
+ }
+}
+
+#ifndef OPUS_FIXED_POINT
+void testlog2(void)
+{
+ float x;
+ for (x=0.001;x<1677700.0;x+=(x/8.0))
+ {
+ float error = fabs((1.442695040888963387*log(x))-celt_log2(x));
+ if (error>0.0009)
+ {
+ fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error);
+ ret = 1;
+ }
+ }
+}
+
+void testexp2(void)
+{
+ float x;
+ for (x=-11.0;x<24.0;x+=0.0007)
+ {
+ float error = fabs(x-(1.442695040888963387*log(celt_exp2(x))));
+ if (error>0.0002)
+ {
+ fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error);
+ ret = 1;
+ }
+ }
+}
+
+void testexp2log2(void)
+{
+ float x;
+ for (x=-11.0;x<24.0;x+=0.0007)
+ {
+ float error = fabs(x-(celt_log2(celt_exp2(x))));
+ if (error>0.001)
+ {
+ fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error);
+ ret = 1;
+ }
+ }
+}
+#else
+void testlog2(void)
+{
+ opus_val32 x;
+ for (x=8;x<1073741824;x+=(x>>3))
+ {
+ float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0);
+ if (error>0.003)
+ {
+ fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error);
+ ret = 1;
+ }
+ }
+}
+
+void testexp2(void)
+{
+ opus_val16 x;
+ for (x=-32768;x<15360;x++)
+ {
+ float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0)));
+ float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0);
+ if (error1>0.0002&&error2>0.00004)
+ {
+ fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2);
+ ret = 1;
+ }
+ }
+}
+
+void testexp2log2(void)
+{
+ opus_val32 x;
+ for (x=8;x<65536;x+=(x>>3))
+ {
+ float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384;
+ if (error>0.004)
+ {
+ fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error);
+ ret = 1;
+ }
+ }
+}
+
+void testilog2(void)
+{
+ opus_val32 x;
+ for (x=1;x<=268435455;x+=127)
+ {
+ opus_val32 lg;
+ opus_val32 y;
+
+ lg = celt_ilog2(x);
+ if (lg<0 || lg>=31)
+ {
+ printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg);
+ ret = 1;
+ }
+ y = 1<<lg;
+
+ if (x<y || (x>>1)>=y)
+ {
+ printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y);
+ ret = 1;
+ }
+ }
+}
+#endif
+
+int main(void)
+{
+ testbitexactcos();
+ testbitexactlog2tan();
+ testdiv();
+ testsqrt();
+ testlog2();
+ testexp2();
+ testexp2log2();
+#ifdef OPUS_FIXED_POINT
+ testilog2();
+#endif
+ return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_mdct.c b/drivers/opus/celt/tests/test_unit_mdct.c
new file mode 100644
index 0000000000..e3b5eec11c
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_mdct.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define SKIP_CONFIG_H
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#include <stdio.h>
+
+#define CELT_C
+#include "mdct.h"
+#include "stack_alloc.h"
+
+#include "kiss_fft.c"
+#include "mdct.c"
+#include "mathops.c"
+#include "entcode.c"
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+int ret = 0;
+void check(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse)
+{
+ int bin,k;
+ double errpow=0,sigpow=0;
+ double snr;
+ for (bin=0;bin<nfft/2;++bin) {
+ double ansr = 0;
+ double difr;
+
+ for (k=0;k<nfft;++k) {
+ double phase = 2*M_PI*(k+.5+.25*nfft)*(bin+.5)/nfft;
+ double re = cos(phase);
+
+ re /= nfft/4;
+
+ ansr += in[k] * re;
+ }
+ /*printf ("%f %f\n", ansr, out[bin]);*/
+ difr = ansr - out[bin];
+ errpow += difr*difr;
+ sigpow += ansr*ansr;
+ }
+ snr = 10*log10(sigpow/errpow);
+ printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+ if (snr<60) {
+ printf( "** poor snr: %f **\n", snr);
+ ret = 1;
+ }
+}
+
+void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse)
+{
+ int bin,k;
+ double errpow=0,sigpow=0;
+ double snr;
+ for (bin=0;bin<nfft;++bin) {
+ double ansr = 0;
+ double difr;
+
+ for (k=0;k<nfft/2;++k) {
+ double phase = 2*M_PI*(bin+.5+.25*nfft)*(k+.5)/nfft;
+ double re = cos(phase);
+
+ /*re *= 2;*/
+
+ ansr += in[k] * re;
+ }
+ /*printf ("%f %f\n", ansr, out[bin]);*/
+ difr = ansr - out[bin];
+ errpow += difr*difr;
+ sigpow += ansr*ansr;
+ }
+ snr = 10*log10(sigpow/errpow);
+ printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+ if (snr<60) {
+ printf( "** poor snr: %f **\n", snr);
+ ret = 1;
+ }
+}
+
+
+void test1d(int nfft,int isinverse)
+{
+ celt_mdct_lookup cfg;
+ size_t buflen = sizeof(kiss_fft_scalar)*nfft;
+
+ kiss_fft_scalar * in = (kiss_fft_scalar*)malloc(buflen);
+ kiss_fft_scalar * in_copy = (kiss_fft_scalar*)malloc(buflen);
+ kiss_fft_scalar * out= (kiss_fft_scalar*)malloc(buflen);
+ opus_val16 * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
+ int k;
+
+ clt_mdct_init(&cfg, nfft, 0);
+ for (k=0;k<nfft;++k) {
+ in[k] = (rand() % 32768) - 16384;
+ }
+
+ for (k=0;k<nfft/2;++k) {
+ window[k] = Q15ONE;
+ }
+ for (k=0;k<nfft;++k) {
+ in[k] *= 32768;
+ }
+
+ if (isinverse)
+ {
+ for (k=0;k<nfft;++k) {
+ in[k] /= nfft;
+ }
+ }
+
+ for (k=0;k<nfft;++k)
+ in_copy[k] = in[k];
+ /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
+
+ if (isinverse)
+ {
+ for (k=0;k<nfft;++k)
+ out[k] = 0;
+ clt_mdct_backward(&cfg,in,out, window, nfft/2, 0, 1);
+ /* apply TDAC because clt_mdct_backward() no longer does that */
+ for (k=0;k<nfft/4;++k)
+ out[nfft-k-1] = out[nfft/2+k];
+ check_inv(in,out,nfft,isinverse);
+ } else {
+ clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1);
+ check(in_copy,out,nfft,isinverse);
+ }
+ /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
+
+
+ free(in);
+ free(out);
+ clt_mdct_clear(&cfg);
+}
+
+int main(int argc,char ** argv)
+{
+ ALLOC_STACK;
+ if (argc>1) {
+ int k;
+ for (k=1;k<argc;++k) {
+ test1d(atoi(argv[k]),0);
+ test1d(atoi(argv[k]),1);
+ }
+ }else{
+ test1d(32,0);
+ test1d(32,1);
+ test1d(256,0);
+ test1d(256,1);
+ test1d(512,0);
+ test1d(512,1);
+ test1d(1024,0);
+ test1d(1024,1);
+ test1d(2048,0);
+ test1d(2048,1);
+#ifndef RADIX_TWO_ONLY
+ test1d(36,0);
+ test1d(36,1);
+ test1d(40,0);
+ test1d(40,1);
+ test1d(60,0);
+ test1d(60,1);
+ test1d(120,0);
+ test1d(120,1);
+ test1d(240,0);
+ test1d(240,1);
+ test1d(480,0);
+ test1d(480,1);
+ test1d(960,0);
+ test1d(960,1);
+ test1d(1920,0);
+ test1d(1920,1);
+#endif
+ }
+ return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_rotation.c b/drivers/opus/celt/tests/test_unit_rotation.c
new file mode 100644
index 0000000000..c12cc3f02f
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_rotation.c
@@ -0,0 +1,90 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#define CELT_C
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "vq.c"
+#include "cwrs.c"
+#include "entcode.c"
+#include "entenc.c"
+#include "entdec.c"
+#include "mathops.c"
+#include "bands.h"
+#include <math.h>
+#define MAX_SIZE 100
+
+int ret=0;
+void test_rotation(int N, int K)
+{
+ int i;
+ double err = 0, ener = 0, snr, snr0;
+ opus_val16 x0[MAX_SIZE];
+ opus_val16 x1[MAX_SIZE];
+ for (i=0;i<N;i++)
+ x1[i] = x0[i] = rand()%32767-16384;
+ exp_rotation(x1, N, 1, 1, K, SPREAD_NORMAL);
+ for (i=0;i<N;i++)
+ {
+ err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
+ ener += x0[i]*(double)x0[i];
+ }
+ snr0 = 20*log10(ener/err);
+ err = ener = 0;
+ exp_rotation(x1, N, -1, 1, K, SPREAD_NORMAL);
+ for (i=0;i<N;i++)
+ {
+ err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
+ ener += x0[i]*(double)x0[i];
+ }
+ snr = 20*log10(ener/err);
+ printf ("SNR for size %d (%d pulses) is %f (was %f without inverse)\n", N, K, snr, snr0);
+ if (snr < 60 || snr0 > 20)
+ {
+ fprintf(stderr, "FAIL!\n");
+ ret = 1;
+ }
+}
+
+int main(void)
+{
+ ALLOC_STACK;
+ test_rotation(15, 3);
+ test_rotation(23, 5);
+ test_rotation(50, 3);
+ test_rotation(80, 1);
+ return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_types.c b/drivers/opus/celt/tests/test_unit_types.c
new file mode 100644
index 0000000000..29e671067f
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_types.c
@@ -0,0 +1,50 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_types.h"
+#include <stdio.h>
+
+int main(void)
+{
+ opus_int16 i = 1;
+ i <<= 14;
+ if (i>>14 != 1)
+ {
+ fprintf(stderr, "opus_int16 isn't 16 bits\n");
+ return 1;
+ }
+ if (sizeof(opus_int16)*2 != sizeof(opus_int32))
+ {
+ fprintf(stderr, "16*2 != 32\n");
+ return 1;
+ }
+ return 0;
+}
diff --git a/drivers/opus/celt/vq.c b/drivers/opus/celt/vq.c
new file mode 100644
index 0000000000..20b0b82728
--- /dev/null
+++ b/drivers/opus/celt/vq.c
@@ -0,0 +1,415 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "mathops.h"
+#include "cwrs.h"
+#include "vq.h"
+#include "arch.h"
+#include "os_support.h"
+#include "bands.h"
+#include "rate.h"
+
+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
+{
+ int i;
+ celt_norm *Xptr;
+ Xptr = X;
+ for (i=0;i<len-stride;i++)
+ {
+ celt_norm x1, x2;
+ x1 = Xptr[0];
+ x2 = Xptr[stride];
+ Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+ *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+ }
+ Xptr = &X[len-2*stride-1];
+ for (i=len-2*stride-1;i>=0;i--)
+ {
+ celt_norm x1, x2;
+ x1 = Xptr[0];
+ x2 = Xptr[stride];
+ Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+ *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+ }
+}
+
+static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
+{
+ static const int SPREAD_FACTOR[3]={15,10,5};
+ int i;
+ opus_val16 c, s;
+ opus_val16 gain, theta;
+ int stride2=0;
+ int factor;
+
+ if (2*K>=len || spread==SPREAD_NONE)
+ return;
+ factor = SPREAD_FACTOR[spread-1];
+
+ gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
+ theta = HALF16(MULT16_16_Q15(gain,gain));
+
+ c = celt_cos_norm(EXTEND32(theta));
+ s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */
+
+ if (len>=8*stride)
+ {
+ stride2 = 1;
+ /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
+ It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
+ while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
+ stride2++;
+ }
+ /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+ extract_collapse_mask().*/
+ len /= stride;
+ for (i=0;i<stride;i++)
+ {
+ if (dir < 0)
+ {
+ if (stride2)
+ exp_rotation1(X+i*len, len, stride2, s, c);
+ exp_rotation1(X+i*len, len, 1, c, s);
+ } else {
+ exp_rotation1(X+i*len, len, 1, c, -s);
+ if (stride2)
+ exp_rotation1(X+i*len, len, stride2, s, -c);
+ }
+ }
+}
+
+/** Takes the pitch vector and the decoded residual vector, computes the gain
+ that will give ||p+g*y||=1 and mixes the residual with the pitch. */
+static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
+ int N, opus_val32 Ryy, opus_val16 gain)
+{
+ int i;
+#ifdef OPUS_FIXED_POINT
+ int k;
+#endif
+ opus_val32 t;
+ opus_val16 g;
+
+#ifdef OPUS_FIXED_POINT
+ k = celt_ilog2(Ryy)>>1;
+#endif
+ t = VSHR32(Ryy, 2*(k-7));
+ g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+ i=0;
+ do
+ X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
+ while (++i < N);
+}
+
+static unsigned extract_collapse_mask(int *iy, int N, int B)
+{
+ unsigned collapse_mask;
+ int N0;
+ int i;
+ if (B<=1)
+ return 1;
+ /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+ exp_rotation().*/
+ N0 = N/B;
+ collapse_mask = 0;
+ i=0; do {
+ int j;
+ j=0; do {
+ collapse_mask |= (iy[i*N0+j]!=0)<<i;
+ } while (++j<N0);
+ } while (++i<B);
+ return collapse_mask;
+}
+
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
+#ifdef RESYNTH
+ , opus_val16 gain
+#endif
+ )
+{
+ VARDECL(celt_norm, y);
+ VARDECL(int, iy);
+ VARDECL(opus_val16, signx);
+ int i, j;
+ opus_val16 s;
+ int pulsesLeft;
+ opus_val32 sum;
+ opus_val32 xy;
+ opus_val16 yy;
+ unsigned collapse_mask;
+ SAVE_STACK;
+
+ celt_assert2(K>0, "alg_quant() needs at least one pulse");
+ celt_assert2(N>1, "alg_quant() needs at least two dimensions");
+
+ ALLOC(y, N, celt_norm);
+ ALLOC(iy, N, int);
+ ALLOC(signx, N, opus_val16);
+
+ exp_rotation(X, N, 1, B, K, spread);
+
+ /* Get rid of the sign */
+ sum = 0;
+ j=0; do {
+ if (X[j]>0)
+ signx[j]=1;
+ else {
+ signx[j]=-1;
+ X[j]=-X[j];
+ }
+ iy[j] = 0;
+ y[j] = 0;
+ } while (++j<N);
+
+ xy = yy = 0;
+
+ pulsesLeft = K;
+
+ /* Do a pre-search by projecting on the pyramid */
+ if (K > (N>>1))
+ {
+ opus_val16 rcp;
+ j=0; do {
+ sum += X[j];
+ } while (++j<N);
+
+ /* If X is too small, just replace it with a pulse at 0 */
+#ifdef OPUS_FIXED_POINT
+ if (sum <= K)
+#else
+ /* Prevents infinities and NaNs from causing too many pulses
+ to be allocated. 64 is an approximation of infinity here. */
+ if (!(sum > EPSILON && sum < 64))
+#endif
+ {
+ X[0] = QCONST16(1.f,14);
+ j=1; do
+ X[j]=0;
+ while (++j<N);
+ sum = QCONST16(1.f,14);
+ }
+ rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
+ j=0; do {
+#ifdef OPUS_FIXED_POINT
+ /* It's really important to round *towards zero* here */
+ iy[j] = MULT16_16_Q15(X[j],rcp);
+#else
+ iy[j] = (int)floor(rcp*X[j]);
+#endif
+ y[j] = (celt_norm)iy[j];
+ yy = MAC16_16(yy, y[j],y[j]);
+ xy = MAC16_16(xy, X[j],y[j]);
+ y[j] *= 2;
+ pulsesLeft -= iy[j];
+ } while (++j<N);
+ }
+ celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
+
+ /* This should never happen, but just in case it does (e.g. on silence)
+ we fill the first bin with pulses. */
+#ifdef OPUS_FIXED_POINT_DEBUG
+ celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
+#endif
+ if (pulsesLeft > N+3)
+ {
+ opus_val16 tmp = (opus_val16)pulsesLeft;
+ yy = MAC16_16(yy, tmp, tmp);
+ yy = MAC16_16(yy, tmp, y[0]);
+ iy[0] += pulsesLeft;
+ pulsesLeft=0;
+ }
+
+ s = 1;
+ for (i=0;i<pulsesLeft;i++)
+ {
+ int best_id;
+ opus_val32 best_num = -VERY_LARGE16;
+ opus_val16 best_den = 0;
+#ifdef OPUS_FIXED_POINT
+ int rshift;
+#endif
+#ifdef OPUS_FIXED_POINT
+ rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
+#endif
+ best_id = 0;
+ /* The squared magnitude term gets added anyway, so we might as well
+ add it outside the loop */
+ yy = ADD32(yy, 1);
+ j=0;
+ do {
+ opus_val16 Rxy, Ryy;
+ /* Temporary sums of the new pulse(s) */
+ Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
+ /* We're multiplying y[j] by two so we don't have to do it here */
+ Ryy = ADD16(yy, y[j]);
+
+ /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
+ Rxy is positive because the sign is pre-computed) */
+ Rxy = MULT16_16_Q15(Rxy,Rxy);
+ /* The idea is to check for num/den >= best_num/best_den, but that way
+ we can do it without any division */
+ /* OPT: Make sure to use conditional moves here */
+ if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
+ {
+ best_den = Ryy;
+ best_num = Rxy;
+ best_id = j;
+ }
+ } while (++j<N);
+
+ /* Updating the sums of the new pulse(s) */
+ xy = ADD32(xy, EXTEND32(X[best_id]));
+ /* We're multiplying y[j] by two so we don't have to do it here */
+ yy = ADD16(yy, y[best_id]);
+
+ /* Only now that we've made the final choice, update y/iy */
+ /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
+ y[best_id] += 2*s;
+ iy[best_id]++;
+ }
+
+ /* Put the original sign back */
+ j=0;
+ do {
+ X[j] = MULT16_16(signx[j],X[j]);
+ if (signx[j] < 0)
+ iy[j] = -iy[j];
+ } while (++j<N);
+ encode_pulses(iy, N, K, enc);
+
+#ifdef RESYNTH
+ normalise_residual(iy, X, N, yy, gain);
+ exp_rotation(X, N, -1, B, K, spread);
+#endif
+
+ collapse_mask = extract_collapse_mask(iy, N, B);
+ RESTORE_STACK;
+ return collapse_mask;
+}
+
+/** Decode pulse vector and combine the result with the pitch vector to produce
+ the final normalised signal in the current band. */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+ ec_dec *dec, opus_val16 gain)
+{
+ int i;
+ opus_val32 Ryy;
+ unsigned collapse_mask;
+ VARDECL(int, iy);
+ SAVE_STACK;
+
+ celt_assert2(K>0, "alg_unquant() needs at least one pulse");
+ celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
+ ALLOC(iy, N, int);
+ decode_pulses(iy, N, K, dec);
+ Ryy = 0;
+ i=0;
+ do {
+ Ryy = MAC16_16(Ryy, iy[i], iy[i]);
+ } while (++i < N);
+ normalise_residual(iy, X, N, Ryy, gain);
+ exp_rotation(X, N, -1, B, K, spread);
+ collapse_mask = extract_collapse_mask(iy, N, B);
+ RESTORE_STACK;
+ return collapse_mask;
+}
+
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
+{
+ int i;
+#ifdef OPUS_FIXED_POINT
+ int k;
+#endif
+ opus_val32 E = EPSILON;
+ opus_val16 g;
+ opus_val32 t;
+ celt_norm *xptr = X;
+ for (i=0;i<N;i++)
+ {
+ E = MAC16_16(E, *xptr, *xptr);
+ xptr++;
+ }
+#ifdef OPUS_FIXED_POINT
+ k = celt_ilog2(E)>>1;
+#endif
+ t = VSHR32(E, 2*(k-7));
+ g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+ xptr = X;
+ for (i=0;i<N;i++)
+ {
+ *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
+ xptr++;
+ }
+ /*return celt_sqrt(E);*/
+}
+
+int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
+{
+ int i;
+ int itheta;
+ opus_val16 mid, side;
+ opus_val32 Emid, Eside;
+
+ Emid = Eside = EPSILON;
+ if (stereo)
+ {
+ for (i=0;i<N;i++)
+ {
+ celt_norm m, s;
+ m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
+ s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
+ Emid = MAC16_16(Emid, m, m);
+ Eside = MAC16_16(Eside, s, s);
+ }
+ } else {
+ for (i=0;i<N;i++)
+ {
+ celt_norm m, s;
+ m = X[i];
+ s = Y[i];
+ Emid = MAC16_16(Emid, m, m);
+ Eside = MAC16_16(Eside, s, s);
+ }
+ }
+ mid = celt_sqrt(Emid);
+ side = celt_sqrt(Eside);
+#ifdef OPUS_FIXED_POINT
+ /* 0.63662 = 2/pi */
+ itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
+#else
+ itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
+#endif
+
+ return itheta;
+}
diff --git a/drivers/opus/celt/vq.h b/drivers/opus/celt/vq.h
new file mode 100644
index 0000000000..8bab59c5e0
--- /dev/null
+++ b/drivers/opus/celt/vq.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/**
+ @file vq.h
+ @brief Vector quantisation of the residual
+ */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VQ_H
+#define VQ_H
+
+#include "entenc.h"
+#include "entdec.h"
+#include "opus_modes.h"
+
+/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
+ * the pitch and a combination of pulses such that its norm is still equal
+ * to 1. This is the function that will typically require the most CPU.
+ * @param X Residual signal to quantise/encode (returns quantised version)
+ * @param N Number of samples to encode
+ * @param K Number of pulses to use
+ * @param enc Entropy encoder state
+ * @ret A mask indicating which blocks in the band received pulses
+*/
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
+ ec_enc *enc
+#ifdef RESYNTH
+ , opus_val16 gain
+#endif
+ );
+
+/** Algebraic pulse decoder
+ * @param X Decoded normalised spectrum (returned)
+ * @param N Number of samples to decode
+ * @param K Number of pulses to use
+ * @param dec Entropy decoder state
+ * @ret A mask indicating which blocks in the band received pulses
+ */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+ ec_dec *dec, opus_val16 gain);
+
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain);
+
+int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N);
+
+#endif /* VQ_H */
diff --git a/drivers/opus/celt/x86/pitch_sse.h b/drivers/opus/celt/x86/pitch_sse.h
new file mode 100644
index 0000000000..695122a5ad
--- /dev/null
+++ b/drivers/opus/celt/x86/pitch_sse.h
@@ -0,0 +1,156 @@
+/* Copyright (c) 2013 Jean-Marc Valin and John Ridges */
+/**
+ @file pitch_sse.h
+ @brief Pitch analysis
+ */
+
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_SSE_H
+#define PITCH_SSE_H
+
+#include <xmmintrin.h>
+#include "arch.h"
+
+#define OVERRIDE_XCORR_KERNEL
+static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+{
+ int j;
+ __m128 xsum1, xsum2;
+ xsum1 = _mm_loadu_ps(sum);
+ xsum2 = _mm_setzero_ps();
+
+ for (j = 0; j < len-3; j += 4)
+ {
+ __m128 x0 = _mm_loadu_ps(x+j);
+ __m128 yj = _mm_loadu_ps(y+j);
+ __m128 y3 = _mm_loadu_ps(y+j+3);
+
+ xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
+ xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
+ _mm_shuffle_ps(yj,y3,0x49)));
+ xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
+ _mm_shuffle_ps(yj,y3,0x9e)));
+ xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
+ }
+ if (j < len)
+ {
+ xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+ if (++j < len)
+ {
+ xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+ if (++j < len)
+ {
+ xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+ }
+ }
+ }
+ _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
+}
+
+#define OVERRIDE_DUAL_INNER_PROD
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+ int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+ int i;
+ __m128 xsum1, xsum2;
+ xsum1 = _mm_setzero_ps();
+ xsum2 = _mm_setzero_ps();
+ for (i=0;i<N-3;i+=4)
+ {
+ __m128 xi = _mm_loadu_ps(x+i);
+ __m128 y1i = _mm_loadu_ps(y01+i);
+ __m128 y2i = _mm_loadu_ps(y02+i);
+ xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
+ xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
+ }
+ /* Horizontal sum */
+ xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
+ xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
+ _mm_store_ss(xy1, xsum1);
+ xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
+ xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
+ _mm_store_ss(xy2, xsum2);
+ for (;i<N;i++)
+ {
+ *xy1 = MAC16_16(*xy1, x[i], y01[i]);
+ *xy2 = MAC16_16(*xy2, x[i], y02[i]);
+ }
+}
+
+#define OVERRIDE_COMB_FILTER_CONST
+static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+ opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+ int i;
+ __m128 x0v;
+ __m128 g10v, g11v, g12v;
+ g10v = _mm_load1_ps(&g10);
+ g11v = _mm_load1_ps(&g11);
+ g12v = _mm_load1_ps(&g12);
+ x0v = _mm_loadu_ps(&x[-T-2]);
+ for (i=0;i<N-3;i+=4)
+ {
+ __m128 yi, yi2, x1v, x2v, x3v, x4v;
+ const opus_val32 *xp = &x[i-T-2];
+ yi = _mm_loadu_ps(x+i);
+ x4v = _mm_loadu_ps(xp+4);
+#if 0
+ /* Slower version with all loads */
+ x1v = _mm_loadu_ps(xp+1);
+ x2v = _mm_loadu_ps(xp+2);
+ x3v = _mm_loadu_ps(xp+3);
+#else
+ x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
+ x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
+ x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
+#endif
+
+ yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
+#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
+ yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
+ yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+#else
+ /* Use partial sums */
+ yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
+ _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+ yi = _mm_add_ps(yi, yi2);
+#endif
+ x0v=x4v;
+ _mm_storeu_ps(y+i, yi);
+ }
+#ifdef CUSTOM_MODES
+ for (;i<N;i++)
+ {
+ y[i] = x[i]
+ + MULT16_32_Q15(g10,x[i-T])
+ + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+ + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
+ }
+#endif
+}
+
+#endif
diff --git a/drivers/opus/http.c b/drivers/opus/http.c
new file mode 100644
index 0000000000..803db044af
--- /dev/null
+++ b/drivers/opus/http.c
@@ -0,0 +1,3391 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+
+/*RFCs referenced in this file:
+ RFC 761: DOD Standard Transmission Control Protocol
+ RFC 1535: A Security Problem and Proposed Correction With Widely Deployed DNS
+ Software
+ RFC 1738: Uniform Resource Locators (URL)
+ RFC 1945: Hypertext Transfer Protocol -- HTTP/1.0
+ RFC 2068: Hypertext Transfer Protocol -- HTTP/1.1
+ RFC 2145: Use and Interpretation of HTTP Version Numbers
+ RFC 2246: The TLS Protocol Version 1.0
+ RFC 2459: Internet X.509 Public Key Infrastructure Certificate and
+ Certificate Revocation List (CRL) Profile
+ RFC 2616: Hypertext Transfer Protocol -- HTTP/1.1
+ RFC 2617: HTTP Authentication: Basic and Digest Access Authentication
+ RFC 2817: Upgrading to TLS Within HTTP/1.1
+ RFC 2818: HTTP Over TLS
+ RFC 3492: Punycode: A Bootstring encoding of Unicode for Internationalized
+ Domain Names in Applications (IDNA)
+ RFC 3986: Uniform Resource Identifier (URI): Generic Syntax
+ RFC 3987: Internationalized Resource Identifiers (IRIs)
+ RFC 4343: Domain Name System (DNS) Case Insensitivity Clarification
+ RFC 5894: Internationalized Domain Names for Applications (IDNA):
+ Background, Explanation, and Rationale
+ RFC 6066: Transport Layer Security (TLS) Extensions: Extension Definitions
+ RFC 6125: Representation and Verification of Domain-Based Application Service
+ Identity within Internet Public Key Infrastructure Using X.509 (PKIX)
+ Certificates in the Context of Transport Layer Security (TLS)
+ RFC 6555: Happy Eyeballs: Success with Dual-Stack Hosts*/
+
+typedef struct OpusParsedURL OpusParsedURL;
+typedef struct OpusStringBuf OpusStringBuf;
+typedef struct OpusHTTPConn OpusHTTPConn;
+typedef struct OpusHTTPStream OpusHTTPStream;
+
+static char *op_string_range_dup(const char *_start,const char *_end){
+ size_t len;
+ char *ret;
+ OP_ASSERT(_start<=_end);
+ len=_end-_start;
+ /*This is to help avoid overflow elsewhere, later.*/
+ if(OP_UNLIKELY(len>=INT_MAX))return NULL;
+ ret=(char *)_ogg_malloc(sizeof(*ret)*(len+1));
+ if(OP_LIKELY(ret!=NULL)){
+ ret=(char *)memcpy(ret,_start,sizeof(*ret)*(len));
+ ret[len]='\0';
+ }
+ return ret;
+}
+
+static char *op_string_dup(const char *_s){
+ return op_string_range_dup(_s,_s+strlen(_s));
+}
+
+static char *op_string_tolower(char *_s){
+ int i;
+ for(i=0;_s[i]!='\0';i++){
+ int c;
+ c=_s[i];
+ if(c>='A'&&c<='Z')c+='a'-'A';
+ _s[i]=(char)c;
+ }
+ return _s;
+}
+
+/*URI character classes (from RFC 3986).*/
+#define OP_URL_ALPHA \
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+#define OP_URL_DIGIT "0123456789"
+#define OP_URL_HEXDIGIT "0123456789ABCDEFabcdef"
+/*Not a character class, but the characters allowed in <scheme>.*/
+#define OP_URL_SCHEME OP_URL_ALPHA OP_URL_DIGIT "+-."
+#define OP_URL_GEN_DELIMS "#/:?@[]"
+#define OP_URL_SUB_DELIMS "!$&'()*+,;="
+#define OP_URL_RESERVED OP_URL_GEN_DELIMS OP_URL_SUB_DELIMS
+#define OP_URL_UNRESERVED OP_URL_ALPHA OP_URL_DIGIT "-._~"
+/*Not a character class, but the characters allowed in <pct-encoded>.*/
+#define OP_URL_PCT_ENCODED "%"
+/*Not a character class or production rule, but for convenience.*/
+#define OP_URL_PCHAR_BASE \
+ OP_URL_UNRESERVED OP_URL_PCT_ENCODED OP_URL_SUB_DELIMS
+#define OP_URL_PCHAR OP_URL_PCHAR_BASE ":@"
+/*Not a character class, but the characters allowed in <userinfo> and
+ <IP-literal>.*/
+#define OP_URL_PCHAR_NA OP_URL_PCHAR_BASE ":"
+/*Not a character class, but the characters allowed in <segment-nz-nc>.*/
+#define OP_URL_PCHAR_NC OP_URL_PCHAR_BASE "@"
+/*Not a character clsss, but the characters allowed in <path>.*/
+#define OP_URL_PATH OP_URL_PCHAR "/"
+/*Not a character class, but the characters allowed in <query> / <fragment>.*/
+#define OP_URL_QUERY_FRAG OP_URL_PCHAR "/?"
+
+/*Check the <% HEXDIG HEXDIG> escapes of a URL for validity.
+ Return: 0 if valid, or a negative value on failure.*/
+static int op_validate_url_escapes(const char *_s){
+ int i;
+ for(i=0;_s[i];i++){
+ if(_s[i]=='%'){
+ if(OP_UNLIKELY(!isxdigit(_s[i+1]))
+ ||OP_UNLIKELY(!isxdigit(_s[i+2]))
+ /*RFC 3986 says %00 "should be rejected if the application is not
+ expecting to receive raw data within a component."*/
+ ||OP_UNLIKELY(_s[i+1]=='0'&&_s[i+2]=='0')){
+ return OP_FALSE;
+ }
+ i+=2;
+ }
+ }
+ return 0;
+}
+
+/*Convert a hex digit to its actual value.
+ _c: The hex digit to convert.
+ Presumed to be valid ('0'...'9', 'A'...'F', or 'a'...'f').
+ Return: The value of the digit, in the range [0,15].*/
+static int op_hex_value(int _c){
+ return _c>='a'?_c-'a'+10:_c>='A'?_c-'A'+10:_c-'0';
+}
+
+/*Unescape all the <% HEXDIG HEXDIG> sequences in a string in-place.
+ This does no validity checking.*/
+static char *op_unescape_url_component(char *_s){
+ int i;
+ int j;
+ for(i=j=0;_s[i];i++,j++){
+ if(_s[i]=='%'){
+ _s[i]=(char)(op_hex_value(_s[i+1])<<4|op_hex_value(_s[i+2]));
+ i+=2;
+ }
+ }
+ return _s;
+}
+
+/*Parse a file: URL.
+ This code is not meant to be fast: strspn() with large sets is likely to be
+ slow, but it is very convenient.
+ It is meant to be RFC 1738-compliant (as updated by RFC 3986).*/
+static const char *op_parse_file_url(const char *_src){
+ const char *scheme_end;
+ const char *path;
+ const char *path_end;
+ scheme_end=_src+strspn(_src,OP_URL_SCHEME);
+ if(OP_UNLIKELY(*scheme_end!=':')
+ ||scheme_end-_src!=4||op_strncasecmp(_src,"file",4)!=0){
+ /*Unsupported protocol.*/
+ return NULL;
+ }
+ /*Make sure all escape sequences are valid to simplify unescaping later.*/
+ if(OP_UNLIKELY(op_validate_url_escapes(scheme_end+1)<0))return NULL;
+ if(scheme_end[1]=='/'&&scheme_end[2]=='/'){
+ const char *host;
+ /*file: URLs can have a host!
+ Yeah, I was surprised, too, but that's what RFC 1738 says.
+ It also says, "The file URL scheme is unusual in that it does not specify
+ an Internet protocol or access method for such files; as such, its
+ utility in network protocols between hosts is limited," which is a mild
+ understatement.*/
+ host=scheme_end+3;
+ /*The empty host is what we expect.*/
+ if(OP_LIKELY(*host=='/'))path=host;
+ else{
+ const char *host_end;
+ char host_buf[28];
+ /*RFC 1738 says localhost "is interpreted as `the machine from which the
+ URL is being interpreted,'" so let's check for it.*/
+ host_end=host+strspn(host,OP_URL_PCHAR_BASE);
+ /*No <port> allowed.
+ This also rejects IP-Literals.*/
+ if(*host_end!='/')return NULL;
+ /*An escaped "localhost" can take at most 27 characters.*/
+ if(OP_UNLIKELY(host_end-host>27))return NULL;
+ memcpy(host_buf,host,sizeof(*host_buf)*(host_end-host));
+ host_buf[host_end-host]='\0';
+ op_unescape_url_component(host_buf);
+ op_string_tolower(host_buf);
+ /*Some other host: give up.*/
+ if(OP_UNLIKELY(strcmp(host_buf,"localhost")!=0))return NULL;
+ path=host_end;
+ }
+ }
+ else path=scheme_end+1;
+ path_end=path+strspn(path,OP_URL_PATH);
+ /*This will reject a <query> or <fragment> component, too.
+ I don't know what to do with queries, but a temporal fragment would at
+ least make sense.
+ RFC 1738 pretty clearly defines a <searchpart> that's equivalent to the
+ RFC 3986 <query> component for other schemes, but not the file: scheme,
+ so I'm going to just reject it.*/
+ if(*path_end!='\0')return NULL;
+ return path;
+}
+
+#if defined(OP_ENABLE_HTTP)
+# if defined(_WIN32)
+# include <winsock2.h>
+# include <ws2tcpip.h>
+# include <openssl/ssl.h>
+# include "winerrno.h"
+
+typedef SOCKET op_sock;
+
+# define OP_INVALID_SOCKET (INVALID_SOCKET)
+
+/*Vista and later support WSAPoll(), but we don't want to rely on that.
+ Instead we re-implement it badly using select().
+ Unfortunately, they define a conflicting struct pollfd, so we only define our
+ own if it looks like that one has not already been defined.*/
+# if !defined(POLLIN)
+/*Equivalent to POLLIN.*/
+# define POLLRDNORM (0x0100)
+/*Priority band data can be read.*/
+# define POLLRDBAND (0x0200)
+/*There is data to read.*/
+# define POLLIN (POLLRDNORM|POLLRDBAND)
+/* There is urgent data to read.*/
+# define POLLPRI (0x0400)
+/*Equivalent to POLLOUT.*/
+# define POLLWRNORM (0x0010)
+/*Writing now will not block.*/
+# define POLLOUT (POLLWRNORM)
+/*Priority data may be written.*/
+# define POLLWRBAND (0x0020)
+/*Error condition (output only).*/
+# define POLLERR (0x0001)
+/*Hang up (output only).*/
+# define POLLHUP (0x0002)
+/*Invalid request: fd not open (output only).*/
+# define POLLNVAL (0x0004)
+
+struct pollfd{
+ /*File descriptor.*/
+ op_sock fd;
+ /*Requested events.*/
+ short events;
+ /*Returned events.*/
+ short revents;
+};
+# endif
+
+/*But Winsock never defines nfds_t (it's simply hard-coded to ULONG).*/
+typedef unsigned long nfds_t;
+
+/*The usage of FD_SET() below is O(N^2).
+ This is okay because select() is limited to 64 sockets in Winsock, anyway.
+ In practice, we only ever call it with one or two sockets.*/
+static int op_poll_win32(struct pollfd *_fds,nfds_t _nfds,int _timeout){
+ struct timeval tv;
+ fd_set ifds;
+ fd_set ofds;
+ fd_set efds;
+ nfds_t i;
+ int ret;
+ FD_ZERO(&ifds);
+ FD_ZERO(&ofds);
+ FD_ZERO(&efds);
+ for(i=0;i<_nfds;i++){
+ _fds[i].revents=0;
+ if(_fds[i].events&POLLIN)FD_SET(_fds[i].fd,&ifds);
+ if(_fds[i].events&POLLOUT)FD_SET(_fds[i].fd,&ofds);
+ FD_SET(_fds[i].fd,&efds);
+ }
+ if(_timeout>=0){
+ tv.tv_sec=_timeout/1000;
+ tv.tv_usec=(_timeout%1000)*1000;
+ }
+ ret=select(-1,&ifds,&ofds,&efds,_timeout<0?NULL:&tv);
+ if(ret>0){
+ for(i=0;i<_nfds;i++){
+ if(FD_ISSET(_fds[i].fd,&ifds))_fds[i].revents|=POLLIN;
+ if(FD_ISSET(_fds[i].fd,&ofds))_fds[i].revents|=POLLOUT;
+ /*This isn't correct: there are several different things that might have
+ happened to a fd in efds, but I don't know a good way to distinguish
+ them without more context from the caller.
+ It's okay, because we don't actually check any of these bits, we just
+ need _some_ bit set.*/
+ if(FD_ISSET(_fds[i].fd,&efds))_fds[i].revents|=POLLHUP;
+ }
+ }
+ return ret;
+}
+
+/*We define op_errno() to make it clear that it's not an l-value like normal
+ errno is.*/
+# define op_errno() (WSAGetLastError()?WSAGetLastError()-WSABASEERR:0)
+# define op_reset_errno() (WSASetLastError(0))
+
+/*The remaining functions don't get an op_ prefix even though they only
+ operate on sockets, because we don't use non-socket I/O here, and this
+ minimizes the changes needed to deal with Winsock.*/
+# define close(_fd) closesocket(_fd)
+/*This relies on sizeof(u_long)==sizeof(int), which is always true on both
+ Win32 and Win64.*/
+# define ioctl(_fd,_req,_arg) ioctlsocket(_fd,_req,(u_long *)(_arg))
+# define getsockopt(_fd,_level,_name,_val,_len) \
+ getsockopt(_fd,_level,_name,(char *)(_val),_len)
+# define setsockopt(_fd,_level,_name,_val,_len) \
+ setsockopt(_fd,_level,_name,(const char *)(_val),_len)
+# define poll(_fds,_nfds,_timeout) op_poll_win32(_fds,_nfds,_timeout)
+
+# if defined(_MSC_VER)
+typedef ptrdiff_t ssize_t;
+# endif
+
+/*Load certificates from the built-in certificate store.*/
+int SSL_CTX_set_default_verify_paths_win32(SSL_CTX *_ssl_ctx);
+# define SSL_CTX_set_default_verify_paths \
+ SSL_CTX_set_default_verify_paths_win32
+
+# else
+/*Normal Berkeley sockets.*/
+# include <sys/ioctl.h>
+# include <sys/types.h>
+# include <sys/socket.h>
+# include <arpa/inet.h>
+# include <netinet/in.h>
+# include <netinet/tcp.h>
+# include <fcntl.h>
+# include <netdb.h>
+# include <poll.h>
+# include <unistd.h>
+# include <openssl/ssl.h>
+
+typedef int op_sock;
+
+# define OP_INVALID_SOCKET (-1)
+
+# define op_errno() (errno)
+# define op_reset_errno() (errno=0)
+
+# endif
+# include <sys/timeb.h>
+# include <openssl/x509v3.h>
+
+/*The maximum number of simultaneous connections.
+ RFC 2616 says this SHOULD NOT be more than 2, but everyone on the modern web
+ ignores that (e.g., IE 8 bumped theirs up from 2 to 6, Firefox uses 15).
+ If it makes you feel better, we'll only ever actively read from one of these
+ at a time.
+ The others are kept around mainly to avoid slow-starting a new connection
+ when seeking, and time out rapidly.*/
+# define OP_NCONNS_MAX (4)
+
+/*The amount of time before we attempt to re-resolve the host.
+ This is 10 minutes, as recommended in RFC 6555 for expiring cached connection
+ results for dual-stack hosts.*/
+# define OP_RESOLVE_CACHE_TIMEOUT_MS (10*60*(opus_int32)1000)
+
+/*The number of redirections at which we give up.
+ The value here is the current default in Firefox.
+ RFC 2068 mandated a maximum of 5, but RFC 2616 relaxed that to "a client
+ SHOULD detect infinite redirection loops."
+ Fortunately, 20 is less than infinity.*/
+# define OP_REDIRECT_LIMIT (20)
+
+/*The initial size of the buffer used to read a response message (before the
+ body).*/
+# define OP_RESPONSE_SIZE_MIN (510)
+/*The maximum size of a response message (before the body).
+ Responses larger than this will be discarded.
+ I've seen a real server return 20 kB of data for a 302 Found response.
+ Increasing this beyond 32kB will cause problems on platforms with a 16-bit
+ int.*/
+# define OP_RESPONSE_SIZE_MAX (32766)
+
+/*The number of milliseconds we will allow a connection to sit idle before we
+ refuse to resurrect it.
+ Apache as of 2.2 has reduced its default timeout to 5 seconds (from 15), so
+ that's what we'll use here.*/
+# define OP_CONNECTION_IDLE_TIMEOUT_MS (5*1000)
+
+/*The number of milliseconds we will wait to send or receive data before giving
+ up.*/
+# define OP_POLL_TIMEOUT_MS (30*1000)
+
+/*We will always attempt to read ahead at least this much in preference to
+ opening a new connection.*/
+# define OP_READAHEAD_THRESH_MIN (32*(opus_int32)1024)
+
+/*The amount of data to request after a seek.
+ This is a trade-off between read throughput after a seek vs. the the ability
+ to quickly perform another seek with the same connection.*/
+# define OP_PIPELINE_CHUNK_SIZE (32*(opus_int32)1024)
+/*Subsequent chunks are requested with larger and larger sizes until they pass
+ this threshold, after which we just ask for the rest of the resource.*/
+# define OP_PIPELINE_CHUNK_SIZE_MAX (1024*(opus_int32)1024)
+/*This is the maximum number of requests we'll make with a single connection.
+ Many servers will simply disconnect after we attempt some number of requests,
+ possibly without sending a Connection: close header, meaning we won't
+ discover it until we try to read beyond the end of the current chunk.
+ We can reconnect when that happens, but this is slow.
+ Instead, we impose a limit ourselves (set to the default for Apache
+ installations and thus likely the most common value in use).*/
+# define OP_PIPELINE_MAX_REQUESTS (100)
+/*This should be the number of requests, starting from a chunk size of
+ OP_PIPELINE_CHUNK_SIZE and doubling each time, until we exceed
+ OP_PIPELINE_CHUNK_SIZE_MAX and just request the rest of the file.
+ We won't reuse a connection when seeking unless it has at least this many
+ requests left, to reduce the chances we'll have to open a new connection
+ while reading forward afterwards.*/
+# define OP_PIPELINE_MIN_REQUESTS (7)
+
+/*Is this an https URL?
+ For now we can simply check the last letter of the scheme.*/
+# define OP_URL_IS_SSL(_url) ((_url)->scheme[4]=='s')
+
+/*Does this URL use the default port for its scheme?*/
+# define OP_URL_IS_DEFAULT_PORT(_url) \
+ (!OP_URL_IS_SSL(_url)&&(_url)->port==80 \
+ ||OP_URL_IS_SSL(_url)&&(_url)->port==443)
+
+struct OpusParsedURL{
+ /*Either "http" or "https".*/
+ char *scheme;
+ /*The user name from the <userinfo> component, or NULL.*/
+ char *user;
+ /*The password from the <userinfo> component, or NULL.*/
+ char *pass;
+ /*The <host> component.
+ This may not be NULL.*/
+ char *host;
+ /*The <path> and <query> components.
+ This may not be NULL.*/
+ char *path;
+ /*The <port> component.
+ This is set to the default port if the URL did not contain one.*/
+ unsigned port;
+};
+
+/*Parse a URL.
+ This code is not meant to be fast: strspn() with large sets is likely to be
+ slow, but it is very convenient.
+ It is meant to be RFC 3986-compliant.
+ We currently do not support IRIs (Internationalized Resource Identifiers,
+ RFC 3987).
+ Callers should translate them to URIs first.*/
+static int op_parse_url_impl(OpusParsedURL *_dst,const char *_src){
+ const char *scheme_end;
+ const char *authority;
+ const char *userinfo_end;
+ const char *user;
+ const char *user_end;
+ const char *pass;
+ const char *hostport;
+ const char *hostport_end;
+ const char *host_end;
+ const char *port;
+ opus_int32 port_num;
+ const char *port_end;
+ const char *path;
+ const char *path_end;
+ const char *uri_end;
+ scheme_end=_src+strspn(_src,OP_URL_SCHEME);
+ if(OP_UNLIKELY(*scheme_end!=':')
+ ||OP_UNLIKELY(scheme_end-_src<4)||OP_UNLIKELY(scheme_end-_src>5)
+ ||OP_UNLIKELY(op_strncasecmp(_src,"https",scheme_end-_src)!=0)){
+ /*Unsupported protocol.*/
+ return OP_EIMPL;
+ }
+ if(OP_UNLIKELY(scheme_end[1]!='/')||OP_UNLIKELY(scheme_end[2]!='/')){
+ /*We require an <authority> component.*/
+ return OP_EINVAL;
+ }
+ authority=scheme_end+3;
+ /*Make sure all escape sequences are valid to simplify unescaping later.*/
+ if(OP_UNLIKELY(op_validate_url_escapes(authority)<0))return OP_EINVAL;
+ /*Look for a <userinfo> component.*/
+ userinfo_end=authority+strspn(authority,OP_URL_PCHAR_NA);
+ if(*userinfo_end=='@'){
+ /*Found one.*/
+ user=authority;
+ /*Look for a password (yes, clear-text passwords are deprecated, I know,
+ but what else are people supposed to use? use SSL if you care).*/
+ user_end=authority+strspn(authority,OP_URL_PCHAR_BASE);
+ if(*user_end==':')pass=user_end+1;
+ else pass=NULL;
+ hostport=userinfo_end+1;
+ }
+ else{
+ /*We shouldn't have to initialize user_end, but gcc is too dumb to figure
+ out that user!=NULL below means we didn't take this else branch.*/
+ user=user_end=NULL;
+ pass=NULL;
+ hostport=authority;
+ }
+ /*Try to figure out where the <host> component ends.*/
+ if(hostport[0]=='['){
+ hostport++;
+ /*We have an <IP-literal>, which can contain colons.*/
+ hostport_end=host_end=hostport+strspn(hostport,OP_URL_PCHAR_NA);
+ if(OP_UNLIKELY(*hostport_end++!=']'))return OP_EINVAL;
+ }
+ /*Currently we don't support IDNA (RFC 5894), because I don't want to deal
+ with the policy about which domains should not be internationalized to
+ avoid confusing similarities.
+ Give this API Punycode (RFC 3492) domain names instead.*/
+ else hostport_end=host_end=hostport+strspn(hostport,OP_URL_PCHAR_BASE);
+ /*TODO: Validate host.*/
+ /*Is there a port number?*/
+ port_num=-1;
+ if(*hostport_end==':'){
+ int i;
+ port=hostport_end+1;
+ port_end=port+strspn(port,OP_URL_DIGIT);
+ path=port_end;
+ /*Not part of RFC 3986, but require port numbers in the range 0...65535.*/
+ if(OP_LIKELY(port_end-port>0)){
+ while(*port=='0')port++;
+ if(OP_UNLIKELY(port_end-port>5))return OP_EINVAL;
+ port_num=0;
+ for(i=0;i<port_end-port;i++)port_num=port_num*10+port[i]-'0';
+ if(OP_UNLIKELY(port_num>65535))return OP_EINVAL;
+ }
+ }
+ else path=hostport_end;
+ path_end=path+strspn(path,OP_URL_PATH);
+ /*If the path is not empty, it must begin with a '/'.*/
+ if(OP_LIKELY(path_end>path)&&OP_UNLIKELY(path[0]!='/'))return OP_EINVAL;
+ /*Consume the <query> component, if any (right now we don't split this out
+ from the <path> component).*/
+ if(*path_end=='?')path_end=path_end+strspn(path_end,OP_URL_QUERY_FRAG);
+ /*Discard the <fragment> component, if any.
+ This doesn't get sent to the server.
+ Some day we should add support for Media Fragment URIs
+ <http://www.w3.org/TR/media-frags/>.*/
+ if(*path_end=='#')uri_end=path_end+1+strspn(path_end+1,OP_URL_QUERY_FRAG);
+ else uri_end=path_end;
+ /*If there's anything left, this was not a valid URL.*/
+ if(OP_UNLIKELY(*uri_end!='\0'))return OP_EINVAL;
+ _dst->scheme=op_string_range_dup(_src,scheme_end);
+ if(OP_UNLIKELY(_dst->scheme==NULL))return OP_EFAULT;
+ op_string_tolower(_dst->scheme);
+ if(user!=NULL){
+ _dst->user=op_string_range_dup(user,user_end);
+ if(OP_UNLIKELY(_dst->user==NULL))return OP_EFAULT;
+ op_unescape_url_component(_dst->user);
+ /*Unescaping might have created a ':' in the username.
+ That's not allowed by RFC 2617's Basic Authentication Scheme.*/
+ if(OP_UNLIKELY(strchr(_dst->user,':')!=NULL))return OP_EINVAL;
+ }
+ else _dst->user=NULL;
+ if(pass!=NULL){
+ _dst->pass=op_string_range_dup(pass,userinfo_end);
+ if(OP_UNLIKELY(_dst->pass==NULL))return OP_EFAULT;
+ op_unescape_url_component(_dst->pass);
+ }
+ else _dst->pass=NULL;
+ _dst->host=op_string_range_dup(hostport,host_end);
+ if(OP_UNLIKELY(_dst->host==NULL))return OP_EFAULT;
+ if(port_num<0){
+ if(_src[4]=='s')port_num=443;
+ else port_num=80;
+ }
+ _dst->port=(unsigned)port_num;
+ /*RFC 2616 says an empty <abs-path> component is equivalent to "/", and we
+ MUST use the latter in the Request-URI.
+ Reserve space for the slash here.*/
+ if(path==path_end||path[0]=='?')path--;
+ _dst->path=op_string_range_dup(path,path_end);
+ if(OP_UNLIKELY(_dst->path==NULL))return OP_EFAULT;
+ /*And force-set it here.*/
+ _dst->path[0]='/';
+ return 0;
+}
+
+static void op_parsed_url_init(OpusParsedURL *_url){
+ memset(_url,0,sizeof(*_url));
+}
+
+static void op_parsed_url_clear(OpusParsedURL *_url){
+ _ogg_free(_url->scheme);
+ _ogg_free(_url->user);
+ _ogg_free(_url->pass);
+ _ogg_free(_url->host);
+ _ogg_free(_url->path);
+}
+
+static int op_parse_url(OpusParsedURL *_dst,const char *_src){
+ OpusParsedURL url;
+ int ret;
+ op_parsed_url_init(&url);
+ ret=op_parse_url_impl(&url,_src);
+ if(OP_UNLIKELY(ret<0))op_parsed_url_clear(&url);
+ else *_dst=*&url;
+ return ret;
+}
+
+/*A buffer to hold growing strings.
+ The main purpose of this is to consolidate allocation checks and simplify
+ cleanup on a failed allocation.*/
+struct OpusStringBuf{
+ char *buf;
+ int nbuf;
+ int cbuf;
+};
+
+static void op_sb_init(OpusStringBuf *_sb){
+ _sb->buf=NULL;
+ _sb->nbuf=0;
+ _sb->cbuf=0;
+}
+
+static void op_sb_clear(OpusStringBuf *_sb){
+ _ogg_free(_sb->buf);
+}
+
+/*Make sure we have room for at least _capacity characters (plus 1 more for the
+ terminating NUL).*/
+static int op_sb_ensure_capacity(OpusStringBuf *_sb,int _capacity){
+ char *buf;
+ int cbuf;
+ buf=_sb->buf;
+ cbuf=_sb->cbuf;
+ if(_capacity>=cbuf-1){
+ if(OP_UNLIKELY(cbuf>INT_MAX-1>>1))return OP_EFAULT;
+ if(OP_UNLIKELY(_capacity>=INT_MAX-1))return OP_EFAULT;
+ cbuf=OP_MAX(2*cbuf+1,_capacity+1);
+ buf=_ogg_realloc(buf,sizeof(*buf)*cbuf);
+ if(OP_UNLIKELY(buf==NULL))return OP_EFAULT;
+ _sb->buf=buf;
+ _sb->cbuf=cbuf;
+ }
+ return 0;
+}
+
+/*Increase the capacity of the buffer, but not to more than _max_size
+ characters (plus 1 more for the terminating NUL).*/
+static int op_sb_grow(OpusStringBuf *_sb,int _max_size){
+ char *buf;
+ int cbuf;
+ buf=_sb->buf;
+ cbuf=_sb->cbuf;
+ OP_ASSERT(_max_size<=INT_MAX-1);
+ cbuf=cbuf<=_max_size-1>>1?2*cbuf+1:_max_size+1;
+ buf=_ogg_realloc(buf,sizeof(*buf)*cbuf);
+ if(OP_UNLIKELY(buf==NULL))return OP_EFAULT;
+ _sb->buf=buf;
+ _sb->cbuf=cbuf;
+ return 0;
+}
+
+static int op_sb_append(OpusStringBuf *_sb,const char *_s,int _len){
+ char *buf;
+ int nbuf;
+ int ret;
+ nbuf=_sb->nbuf;
+ if(OP_UNLIKELY(nbuf>INT_MAX-_len))return OP_EFAULT;
+ ret=op_sb_ensure_capacity(_sb,nbuf+_len);
+ if(OP_UNLIKELY(ret<0))return ret;
+ buf=_sb->buf;
+ memcpy(buf+nbuf,_s,sizeof(*buf)*_len);
+ nbuf+=_len;
+ buf[nbuf]='\0';
+ _sb->nbuf=nbuf;
+ return 0;
+}
+
+static int op_sb_append_string(OpusStringBuf *_sb,const char *_s){
+ return op_sb_append(_sb,_s,strlen(_s));
+}
+
+static int op_sb_append_port(OpusStringBuf *_sb,unsigned _port){
+ char port_buf[7];
+ OP_ASSERT(_port<=65535U);
+ sprintf(port_buf,":%u",_port);
+ return op_sb_append_string(_sb,port_buf);
+}
+
+static int op_sb_append_nonnegative_int64(OpusStringBuf *_sb,opus_int64 _i){
+ char digit;
+ int nbuf_start;
+ int ret;
+ OP_ASSERT(_i>=0);
+ nbuf_start=_sb->nbuf;
+ ret=0;
+ do{
+ digit='0'+_i%10;
+ ret|=op_sb_append(_sb,&digit,1);
+ _i/=10;
+ }
+ while(_i>0);
+ if(OP_LIKELY(ret>=0)){
+ char *buf;
+ int nbuf_end;
+ buf=_sb->buf;
+ nbuf_end=_sb->nbuf-1;
+ /*We've added the digits backwards.
+ Reverse them.*/
+ while(nbuf_start<nbuf_end){
+ digit=buf[nbuf_start];
+ buf[nbuf_start]=buf[nbuf_end];
+ buf[nbuf_end]=digit;
+ nbuf_start++;
+ nbuf_end--;
+ }
+ }
+ return ret;
+}
+
+static struct addrinfo *op_resolve(const char *_host,unsigned _port){
+ struct addrinfo *addrs;
+ struct addrinfo hints;
+ char service[6];
+ memset(&hints,0,sizeof(hints));
+ hints.ai_socktype=SOCK_STREAM;
+#if !defined(_WIN32)
+ hints.ai_flags=AI_NUMERICSERV;
+#endif
+ OP_ASSERT(_port<=65535U);
+ sprintf(service,"%u",_port);
+ if(OP_LIKELY(!getaddrinfo(_host,service,&hints,&addrs)))return addrs;
+ return NULL;
+}
+
+static int op_sock_set_nonblocking(op_sock _fd,int _nonblocking){
+#if !defined(_WIN32)
+ int flags;
+ flags=fcntl(_fd,F_GETFL);
+ if(OP_UNLIKELY(flags<0))return flags;
+ if(_nonblocking)flags|=O_NONBLOCK;
+ else flags&=~O_NONBLOCK;
+ return fcntl(_fd,F_SETFL,flags);
+#else
+ return ioctl(_fd,FIONBIO,&_nonblocking);
+#endif
+}
+
+/*Disable/enable write coalescing if we can.
+ We always send whole requests at once and always parse the response headers
+ before sending another one, so normally write coalescing just causes added
+ delay.*/
+static void op_sock_set_tcp_nodelay(op_sock _fd,int _nodelay){
+# if defined(TCP_NODELAY)&&(defined(IPPROTO_TCP)||defined(SOL_TCP))
+# if defined(IPPROTO_TCP)
+# define OP_SO_LEVEL IPPROTO_TCP
+# else
+# define OP_SO_LEVEL SOL_TCP
+# endif
+ /*It doesn't really matter if this call fails, but it would be interesting
+ to hit a case where it does.*/
+ OP_ALWAYS_TRUE(!setsockopt(_fd,OP_SO_LEVEL,TCP_NODELAY,
+ &_nodelay,sizeof(_nodelay)));
+# endif
+}
+
+#if defined(_WIN32)
+static void op_init_winsock(){
+ static LONG count;
+ static WSADATA wsadata;
+ if(InterlockedIncrement(&count)==1)WSAStartup(0x0202,&wsadata);
+}
+#endif
+
+/*A single physical connection to an HTTP server.
+ We may have several of these open at once.*/
+struct OpusHTTPConn{
+ /*The current position indicator for this connection.*/
+ opus_int64 pos;
+ /*The position where the current request will end, or -1 if we're reading
+ until EOF (an unseekable stream or the initial HTTP/1.0 request).*/
+ opus_int64 end_pos;
+ /*The position where next request we've sent will start, or -1 if we haven't
+ sent the next request yet.*/
+ opus_int64 next_pos;
+ /*The end of the next request or -1 if we requested the rest of the resource.
+ This is only set to a meaningful value if next_pos is not -1.*/
+ opus_int64 next_end;
+ /*The SSL connection, if this is https.*/
+ SSL *ssl_conn;
+ /*The next connection in either the LRU or free list.*/
+ OpusHTTPConn *next;
+ /*The last time we blocked for reading from this connection.*/
+ struct timeb read_time;
+ /*The number of bytes we've read since the last time we blocked.*/
+ opus_int64 read_bytes;
+ /*The estimated throughput of this connection, in bytes/s.*/
+ opus_int64 read_rate;
+ /*The socket we're reading from.*/
+ op_sock fd;
+ /*The number of remaining requests we are allowed on this connection.*/
+ int nrequests_left;
+ /*The chunk size to use for pipelining requests.*/
+ opus_int32 chunk_size;
+};
+
+static void op_http_conn_init(OpusHTTPConn *_conn){
+ _conn->next_pos=-1;
+ _conn->ssl_conn=NULL;
+ _conn->next=NULL;
+ _conn->fd=OP_INVALID_SOCKET;
+}
+
+static void op_http_conn_clear(OpusHTTPConn *_conn){
+ if(_conn->ssl_conn!=NULL)SSL_free(_conn->ssl_conn);
+ /*SSL frees the BIO for us.*/
+ if(_conn->fd!=OP_INVALID_SOCKET)close(_conn->fd);
+}
+
+/*The global stream state.*/
+struct OpusHTTPStream{
+ /*The list of connections.*/
+ OpusHTTPConn conns[OP_NCONNS_MAX];
+ /*The context object used as a framework for TLS/SSL functions.*/
+ SSL_CTX *ssl_ctx;
+ /*The cached session to reuse for future connections.*/
+ SSL_SESSION *ssl_session;
+ /*The LRU list (ordered from MRU to LRU) of currently connected
+ connections.*/
+ OpusHTTPConn *lru_head;
+ /*The free list.*/
+ OpusHTTPConn *free_head;
+ /*The URL to connect to.*/
+ OpusParsedURL url;
+ /*Information about the address we connected to.*/
+ struct addrinfo addr_info;
+ /*The address we connected to.*/
+ union{
+ struct sockaddr s;
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } addr;
+ /*The last time we re-resolved the host.*/
+ struct timeb resolve_time;
+ /*A buffer used to build HTTP requests.*/
+ OpusStringBuf request;
+ /*A buffer used to build proxy CONNECT requests.*/
+ OpusStringBuf proxy_connect;
+ /*A buffer used to receive the response headers.*/
+ OpusStringBuf response;
+ /*The Content-Length, if specified, or -1 otherwise.
+ This will always be specified for seekable streams.*/
+ opus_int64 content_length;
+ /*The position indicator used when no connection is active.*/
+ opus_int64 pos;
+ /*The host we actually connected to.*/
+ char *connect_host;
+ /*The port we actually connected to.*/
+ unsigned connect_port;
+ /*The connection we're currently reading from.
+ This can be -1 if no connection is active.*/
+ int cur_conni;
+ /*Whether or not the server supports range requests.*/
+ int seekable;
+ /*Whether or not the server supports HTTP/1.1 with persistent connections.*/
+ int pipeline;
+ /*Whether or not we should skip certificate checks.*/
+ int skip_certificate_check;
+ /*The offset of the tail of the request.
+ Only the offset in the Range: header appears after this, allowing us to
+ quickly edit the request to ask for a new range.*/
+ int request_tail;
+ /*The estimated time required to open a new connection, in milliseconds.*/
+ opus_int32 connect_rate;
+};
+
+static void op_http_stream_init(OpusHTTPStream *_stream){
+ OpusHTTPConn **pnext;
+ int ci;
+ pnext=&_stream->free_head;
+ for(ci=0;ci<OP_NCONNS_MAX;ci++){
+ op_http_conn_init(_stream->conns+ci);
+ *pnext=_stream->conns+ci;
+ pnext=&_stream->conns[ci].next;
+ }
+ _stream->ssl_ctx=NULL;
+ _stream->ssl_session=NULL;
+ _stream->lru_head=NULL;
+ op_parsed_url_init(&_stream->url);
+ op_sb_init(&_stream->request);
+ op_sb_init(&_stream->proxy_connect);
+ op_sb_init(&_stream->response);
+ _stream->connect_host=NULL;
+ _stream->seekable=0;
+}
+
+/*Close the connection and move it to the free list.
+ _stream: The stream containing the free list.
+ _conn: The connection to close.
+ _penxt: The linked-list pointer currently pointing to this connection.
+ _gracefully: Whether or not to shut down cleanly.*/
+static void op_http_conn_close(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ OpusHTTPConn **_pnext,int _gracefully){
+ /*If we don't shut down gracefully, the server MUST NOT re-use our session
+ according to RFC 2246, because it can't tell the difference between an
+ abrupt close and a truncation attack.
+ So we shut down gracefully if we can.
+ However, we will not wait if this would block (it's not worth the savings
+ from session resumption to do so).
+ Clients (that's us) MAY resume a TLS session that ended with an incomplete
+ close, according to RFC 2818, so there's no reason to make sure the server
+ shut things down gracefully.*/
+ if(_gracefully&&_conn->ssl_conn!=NULL)SSL_shutdown(_conn->ssl_conn);
+ op_http_conn_clear(_conn);
+ _conn->next_pos=-1;
+ _conn->ssl_conn=NULL;
+ _conn->fd=OP_INVALID_SOCKET;
+ OP_ASSERT(*_pnext==_conn);
+ *_pnext=_conn->next;
+ _conn->next=_stream->free_head;
+ _stream->free_head=_conn;
+}
+
+static void op_http_stream_clear(OpusHTTPStream *_stream){
+ while(_stream->lru_head!=NULL){
+ op_http_conn_close(_stream,_stream->lru_head,&_stream->lru_head,0);
+ }
+ if(_stream->ssl_session!=NULL)SSL_SESSION_free(_stream->ssl_session);
+ if(_stream->ssl_ctx!=NULL)SSL_CTX_free(_stream->ssl_ctx);
+ op_sb_clear(&_stream->response);
+ op_sb_clear(&_stream->proxy_connect);
+ op_sb_clear(&_stream->request);
+ if(_stream->connect_host!=_stream->url.host)_ogg_free(_stream->connect_host);
+ op_parsed_url_clear(&_stream->url);
+}
+
+static int op_http_conn_write_fully(OpusHTTPConn *_conn,
+ const char *_buf,int _buf_size){
+ struct pollfd fd;
+ SSL *ssl_conn;
+ fd.fd=_conn->fd;
+ ssl_conn=_conn->ssl_conn;
+ while(_buf_size>0){
+ int err;
+ if(ssl_conn!=NULL){
+ int ret;
+ ret=SSL_write(ssl_conn,_buf,_buf_size);
+ if(ret>0){
+ /*Wrote some data.*/
+ _buf+=ret;
+ _buf_size-=ret;
+ continue;
+ }
+ /*Connection closed.*/
+ else if(ret==0)return OP_FALSE;
+ err=SSL_get_error(ssl_conn,ret);
+ /*Yes, renegotiations can cause SSL_write() to block for reading.*/
+ if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+ else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+ else return OP_FALSE;
+ }
+ else{
+ ssize_t ret;
+ op_reset_errno();
+ ret=send(fd.fd,_buf,_buf_size,0);
+ if(ret>0){
+ _buf+=ret;
+ _buf_size-=ret;
+ continue;
+ }
+ err=op_errno();
+ if(err!=EAGAIN&&err!=EWOULDBLOCK)return OP_FALSE;
+ fd.events=POLLOUT;
+ }
+ if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_FALSE;
+ }
+ return 0;
+}
+
+static int op_http_conn_estimate_available(OpusHTTPConn *_conn){
+ int available;
+ int ret;
+ ret=ioctl(_conn->fd,FIONREAD,&available);
+ if(ret<0)available=0;
+ /*This requires the SSL read_ahead flag to be unset to work.
+ We ignore partial records as well as the protocol overhead for any pending
+ bytes.
+ This means we might return somewhat less than can truly be read without
+ blocking (if there's a partial record).
+ This is okay, because we're using this value to estimate network transfer
+ time, and we _have_ already received those bytes.
+ We also might return slightly more (due to protocol overhead), but that's
+ small enough that it probably doesn't matter.*/
+ if(_conn->ssl_conn!=NULL)available+=SSL_pending(_conn->ssl_conn);
+ return available;
+}
+
+static opus_int32 op_time_diff_ms(const struct timeb *_end,
+ const struct timeb *_start){
+ opus_int64 dtime;
+ dtime=_end->time-(opus_int64)_start->time;
+ OP_ASSERT(_end->millitm<1000);
+ OP_ASSERT(_start->millitm<1000);
+ if(OP_UNLIKELY(dtime>(OP_INT32_MAX-1000)/1000))return OP_INT32_MAX;
+ if(OP_UNLIKELY(dtime<(OP_INT32_MIN+1000)/1000))return OP_INT32_MIN;
+ return (opus_int32)dtime*1000+_end->millitm-_start->millitm;
+}
+
+/*Update the read rate estimate for this connection.*/
+static void op_http_conn_read_rate_update(OpusHTTPConn *_conn){
+ struct timeb read_time;
+ opus_int32 read_delta_ms;
+ opus_int64 read_delta_bytes;
+ opus_int64 read_rate;
+ read_delta_bytes=_conn->read_bytes;
+ if(read_delta_bytes<=0)return;
+ ftime(&read_time);
+ read_delta_ms=op_time_diff_ms(&read_time,&_conn->read_time);
+ read_rate=_conn->read_rate;
+ read_delta_ms=OP_MAX(read_delta_ms,1);
+ read_rate+=read_delta_bytes*1000/read_delta_ms-read_rate+4>>3;
+ *&_conn->read_time=*&read_time;
+ _conn->read_bytes=0;
+ _conn->read_rate=read_rate;
+}
+
+/*Tries to read from the given connection.
+ [out] _buf: Returns the data read.
+ _buf_size: The size of the buffer.
+ _blocking: Whether or not to block until some data is retrieved.
+ Return: A positive number of bytes read on success.
+ 0: The read would block, or the connection was closed.
+ OP_EREAD: There was a fatal read error.*/
+static int op_http_conn_read(OpusHTTPConn *_conn,
+ char *_buf,int _buf_size,int _blocking){
+ struct pollfd fd;
+ SSL *ssl_conn;
+ int nread;
+ int nread_unblocked;
+ fd.fd=_conn->fd;
+ ssl_conn=_conn->ssl_conn;
+ nread=nread_unblocked=0;
+ /*RFC 2818 says "client implementations MUST treat any premature closes as
+ errors and the data received as potentially truncated," so we make very
+ sure to report read errors upwards.*/
+ do{
+ int err;
+ if(ssl_conn!=NULL){
+ int ret;
+ ret=SSL_read(ssl_conn,_buf+nread,_buf_size-nread);
+ OP_ASSERT(ret<=_buf_size-nread);
+ if(ret>0){
+ /*Read some data.
+ Keep going to see if there's more.*/
+ nread+=ret;
+ nread_unblocked+=ret;
+ continue;
+ }
+ /*If we already read some data, return it right now.*/
+ if(nread>0)break;
+ err=SSL_get_error(ssl_conn,ret);
+ if(ret==0){
+ /*Connection close.
+ Check for a clean shutdown to prevent truncation attacks.
+ This check always succeeds for SSLv2, as it has no "close notify"
+ message and thus can't verify an orderly shutdown.*/
+ return err==SSL_ERROR_ZERO_RETURN?0:OP_EREAD;
+ }
+ if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+ /*Yes, renegotiations can cause SSL_read() to block for writing.*/
+ else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+ /*Some other error.*/
+ else return OP_EREAD;
+ }
+ else{
+ ssize_t ret;
+ op_reset_errno();
+ ret=recv(fd.fd,_buf+nread,_buf_size-nread,0);
+ OP_ASSERT(ret<=_buf_size-nread);
+ if(ret>0){
+ /*Read some data.
+ Keep going to see if there's more.*/
+ nread+=ret;
+ nread_unblocked+=ret;
+ continue;
+ }
+ /*If we already read some data or the connection was closed, return
+ right now.*/
+ if(ret==0||nread>0)break;
+ err=op_errno();
+ if(err!=EAGAIN&&err!=EWOULDBLOCK)return OP_EREAD;
+ fd.events=POLLIN;
+ }
+ _conn->read_bytes+=nread_unblocked;
+ op_http_conn_read_rate_update(_conn);
+ nread_unblocked=0;
+ if(!_blocking)break;
+ /*Need to wait to get any data at all.*/
+ if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_EREAD;
+ }
+ while(nread<_buf_size);
+ _conn->read_bytes+=nread_unblocked;
+ return nread;
+}
+
+/*Tries to look at the pending data for a connection without consuming it.
+ [out] _buf: Returns the data at which we're peeking.
+ _buf_size: The size of the buffer.*/
+static int op_http_conn_peek(OpusHTTPConn *_conn,char *_buf,int _buf_size){
+ struct pollfd fd;
+ SSL *ssl_conn;
+ int ret;
+ fd.fd=_conn->fd;
+ ssl_conn=_conn->ssl_conn;
+ for(;;){
+ int err;
+ if(ssl_conn!=NULL){
+ ret=SSL_peek(ssl_conn,_buf,_buf_size);
+ /*Either saw some data or the connection was closed.*/
+ if(ret>=0)return ret;
+ err=SSL_get_error(ssl_conn,ret);
+ if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+ /*Yes, renegotiations can cause SSL_peek() to block for writing.*/
+ else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+ else return 0;
+ }
+ else{
+ op_reset_errno();
+ ret=(int)recv(fd.fd,_buf,_buf_size,MSG_PEEK);
+ /*Either saw some data or the connection was closed.*/
+ if(ret>=0)return ret;
+ err=op_errno();
+ if(err!=EAGAIN&&err!=EWOULDBLOCK)return 0;
+ fd.events=POLLIN;
+ }
+ /*Need to wait to get any data at all.*/
+ if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return 0;
+ }
+}
+
+/*When parsing response headers, RFC 2616 mandates that all lines end in CR LF.
+ However, even in the year 2012, I have seen broken servers use just a LF.
+ This is the evil that Postel's advice from RFC 761 breeds.*/
+
+/*Reads the entirety of a response to an HTTP request into the response buffer.
+ Actual parsing and validation is done later.
+ Return: The number of bytes in the response on success, OP_EREAD if the
+ connection was closed before reading any data, or another negative
+ value on any other error.*/
+static int op_http_conn_read_response(OpusHTTPConn *_conn,
+ OpusStringBuf *_response){
+ int ret;
+ _response->nbuf=0;
+ ret=op_sb_ensure_capacity(_response,OP_RESPONSE_SIZE_MIN);
+ if(OP_UNLIKELY(ret<0))return ret;
+ for(;;){
+ char *buf;
+ int size;
+ int capacity;
+ int read_limit;
+ int terminated;
+ size=_response->nbuf;
+ capacity=_response->cbuf-1;
+ if(OP_UNLIKELY(size>=capacity)){
+ ret=op_sb_grow(_response,OP_RESPONSE_SIZE_MAX);
+ if(OP_UNLIKELY(ret<0))return ret;
+ capacity=_response->cbuf-1;
+ /*The response was too large.
+ This prevents a bad server from running us out of memory.*/
+ if(OP_UNLIKELY(size>=capacity))return OP_EIMPL;
+ }
+ buf=_response->buf;
+ ret=op_http_conn_peek(_conn,buf+size,capacity-size);
+ if(OP_UNLIKELY(ret<=0))return size<=0?OP_EREAD:OP_FALSE;
+ /*We read some data.*/
+ /*Make sure the starting characters are "HTTP".
+ Otherwise we could wind up waiting forever for a response from
+ something that is not an HTTP server.*/
+ if(size<4&&op_strncasecmp(buf,"HTTP",OP_MIN(size+ret,4))!=0){
+ return OP_FALSE;
+ }
+ /*How far can we read without passing the "\r\n\r\n" terminator?*/
+ buf[size+ret]='\0';
+ terminated=0;
+ for(read_limit=OP_MAX(size-3,0);read_limit<size+ret;read_limit++){
+ /*We don't look for the leading '\r' thanks to broken servers.*/
+ if(buf[read_limit]=='\n'){
+ if(buf[read_limit+1]=='\r'&&OP_LIKELY(buf[read_limit+2]=='\n')){
+ terminated=3;
+ break;
+ }
+ /*This case is for broken servers.*/
+ else if(OP_UNLIKELY(buf[read_limit+1]=='\n')){
+ terminated=2;
+ break;
+ }
+ }
+ }
+ read_limit+=terminated;
+ OP_ASSERT(size<=read_limit);
+ OP_ASSERT(read_limit<=size+ret);
+ /*Actually consume that data.*/
+ ret=op_http_conn_read(_conn,buf+size,read_limit-size,1);
+ if(OP_UNLIKELY(ret<=0))return OP_FALSE;
+ size+=ret;
+ buf[size]='\0';
+ _response->nbuf=size;
+ /*We found the terminator and read all the data up to and including it.*/
+ if(terminated&&OP_LIKELY(size>=read_limit))return size;
+ }
+ return OP_EIMPL;
+}
+
+# define OP_HTTP_DIGIT "0123456789"
+
+/*The Reason-Phrase is not allowed to contain control characters, except
+ horizontal tab (HT: \011).*/
+# define OP_HTTP_CREASON_PHRASE \
+ "\001\002\003\004\005\006\007\010\012\013\014\015\016\017\020\021" \
+ "\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177"
+
+# define OP_HTTP_CTLS \
+ "\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020" \
+ "\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177"
+
+/*This also includes '\t', but we get that from OP_HTTP_CTLS.*/
+# define OP_HTTP_SEPARATORS " \"(),/:;<=>?@[\\]{}"
+
+/*TEXT can also include LWS, but that has structure, so we parse it
+ separately.*/
+# define OP_HTTP_CTOKEN OP_HTTP_CTLS OP_HTTP_SEPARATORS
+
+/*Return: The amount of linear white space (LWS) at the start of _s.*/
+static int op_http_lwsspn(const char *_s){
+ int i;
+ for(i=0;;){
+ if(_s[0]=='\r'&&_s[1]=='\n'&&(_s[2]=='\t'||_s[2]==' '))i+=3;
+ /*This case is for broken servers.*/
+ else if(_s[0]=='\n'&&(_s[1]=='\t'||_s[1]==' '))i+=2;
+ else if(_s[i]=='\t'||_s[i]==' ')i++;
+ else return i;
+ }
+}
+
+static char *op_http_parse_status_line(int *_v1_1_compat,
+ char **_status_code,char *_response){
+ char *next;
+ char *status_code;
+ int v1_1_compat;
+ size_t d;
+ /*RFC 2616 Section 6.1 does not say that the tokens in the Status-Line cannot
+ be separated by optional LWS, but since it specifically calls out where
+ spaces are to be placed and that CR and LF are not allowed except at the
+ end, I am assuming this to be true.*/
+ /*We already validated that this starts with "HTTP"*/
+ OP_ASSERT(op_strncasecmp(_response,"HTTP",4)==0);
+ next=_response+4;
+ if(OP_UNLIKELY(*next++!='/'))return NULL;
+ d=strspn(next,OP_HTTP_DIGIT);
+ /*"Leading zeros MUST be ignored by recipients."*/
+ while(*next=='0'){
+ next++;
+ OP_ASSERT(d>0);
+ d--;
+ }
+ /*We only support version 1.x*/
+ if(OP_UNLIKELY(d!=1)||OP_UNLIKELY(*next++!='1'))return NULL;
+ if(OP_UNLIKELY(*next++!='.'))return NULL;
+ d=strspn(next,OP_HTTP_DIGIT);
+ if(OP_UNLIKELY(d<=0))return NULL;
+ /*"Leading zeros MUST be ignored by recipients."*/
+ while(*next=='0'){
+ next++;
+ OP_ASSERT(d>0);
+ d--;
+ }
+ /*We don't need to parse the version number.
+ Any non-zero digit means it's greater than 1.*/
+ v1_1_compat=d>0;
+ next+=d;
+ if(OP_UNLIKELY(*next++!=' '))return NULL;
+ status_code=next;
+ d=strspn(next,OP_HTTP_DIGIT);
+ if(OP_UNLIKELY(d!=3))return NULL;
+ next+=d;
+ /*The Reason-Phrase can be empty, but the space must be here.*/
+ if(OP_UNLIKELY(*next++!=' '))return NULL;
+ next+=strcspn(next,OP_HTTP_CREASON_PHRASE);
+ /*We are not mandating this be present thanks to broken servers.*/
+ if(OP_LIKELY(*next=='\r'))next++;
+ if(OP_UNLIKELY(*next++!='\n'))return NULL;
+ if(_v1_1_compat!=NULL)*_v1_1_compat=v1_1_compat;
+ *_status_code=status_code;
+ return next;
+}
+
+/*Get the next response header.
+ [out] _header: The header token, NUL-terminated, with leading and trailing
+ whitespace stripped, and converted to lower case (to simplify
+ case-insensitive comparisons), or NULL if there are no more
+ response headers.
+ [out] _cdr: The remaining contents of the header, excluding the initial
+ colon (':') and the terminating CRLF ("\r\n"),
+ NUL-terminated, and with leading and trailing whitespace
+ stripped, or NULL if there are no more response headers.
+ [inout] _s: On input, this points to the start of the current line of the
+ response headers.
+ On output, it points to the start of the first line following
+ this header, or NULL if there are no more response headers.
+ Return: 0 on success, or a negative value on failure.*/
+static int op_http_get_next_header(char **_header,char **_cdr,char **_s){
+ char *header;
+ char *header_end;
+ char *cdr;
+ char *cdr_end;
+ char *next;
+ size_t d;
+ next=*_s;
+ /*The second case is for broken servers.*/
+ if(next[0]=='\r'&&next[1]=='\n'||OP_UNLIKELY(next[0]=='\n')){
+ /*No more headers.*/
+ *_header=NULL;
+ *_cdr=NULL;
+ *_s=NULL;
+ return 0;
+ }
+ header=next+op_http_lwsspn(next);
+ d=strcspn(header,OP_HTTP_CTOKEN);
+ if(OP_UNLIKELY(d<=0))return OP_FALSE;
+ header_end=header+d;
+ next=header_end+op_http_lwsspn(header_end);
+ if(OP_UNLIKELY(*next++!=':'))return OP_FALSE;
+ next+=op_http_lwsspn(next);
+ cdr=next;
+ do{
+ cdr_end=next+strcspn(next,OP_HTTP_CTLS);
+ next=cdr_end+op_http_lwsspn(cdr_end);
+ }
+ while(next>cdr_end);
+ /*We are not mandating this be present thanks to broken servers.*/
+ if(OP_LIKELY(*next=='\r'))next++;
+ if(OP_UNLIKELY(*next++!='\n'))return OP_FALSE;
+ *header_end='\0';
+ *cdr_end='\0';
+ /*Field names are case-insensitive.*/
+ op_string_tolower(header);
+ *_header=header;
+ *_cdr=cdr;
+ *_s=next;
+ return 0;
+}
+
+static opus_int64 op_http_parse_nonnegative_int64(const char **_next,
+ const char *_cdr){
+ const char *next;
+ opus_int64 ret;
+ int i;
+ next=_cdr+strspn(_cdr,OP_HTTP_DIGIT);
+ *_next=next;
+ if(OP_UNLIKELY(next<=_cdr))return OP_FALSE;
+ while(*_cdr=='0')_cdr++;
+ if(OP_UNLIKELY(next-_cdr>19))return OP_EIMPL;
+ ret=0;
+ for(i=0;i<next-_cdr;i++){
+ int digit;
+ digit=_cdr[i]-'0';
+ /*Check for overflow.*/
+ if(OP_UNLIKELY(ret>(OP_INT64_MAX-9)/10+(digit<=7)))return OP_EIMPL;
+ ret=ret*10+digit;
+ }
+ return ret;
+}
+
+static opus_int64 op_http_parse_content_length(const char *_cdr){
+ const char *next;
+ opus_int64 content_length;
+ content_length=op_http_parse_nonnegative_int64(&next,_cdr);
+ if(OP_UNLIKELY(*next!='\0'))return OP_FALSE;
+ return content_length;
+}
+
+static int op_http_parse_content_range(opus_int64 *_first,opus_int64 *_last,
+ opus_int64 *_length,const char *_cdr){
+ opus_int64 first;
+ opus_int64 last;
+ opus_int64 length;
+ size_t d;
+ if(OP_UNLIKELY(op_strncasecmp(_cdr,"bytes",5)!=0))return OP_FALSE;
+ _cdr+=5;
+ d=op_http_lwsspn(_cdr);
+ if(OP_UNLIKELY(d<=0))return OP_FALSE;
+ _cdr+=d;
+ if(*_cdr!='*'){
+ first=op_http_parse_nonnegative_int64(&_cdr,_cdr);
+ if(OP_UNLIKELY(first<0))return (int)first;
+ _cdr+=op_http_lwsspn(_cdr);
+ if(*_cdr++!='-')return OP_FALSE;
+ _cdr+=op_http_lwsspn(_cdr);
+ last=op_http_parse_nonnegative_int64(&_cdr,_cdr);
+ if(OP_UNLIKELY(last<0))return (int)last;
+ _cdr+=op_http_lwsspn(_cdr);
+ }
+ else{
+ /*This is for a 416 response (Requested range not satisfiable).*/
+ first=last=-1;
+ _cdr++;
+ }
+ if(OP_UNLIKELY(*_cdr++!='/'))return OP_FALSE;
+ if(*_cdr!='*'){
+ length=op_http_parse_nonnegative_int64(&_cdr,_cdr);
+ if(OP_UNLIKELY(length<0))return (int)length;
+ }
+ else{
+ /*The total length is unspecified.*/
+ _cdr++;
+ length=-1;
+ }
+ if(OP_UNLIKELY(*_cdr!='\0'))return OP_FALSE;
+ if(OP_UNLIKELY(last<first))return OP_FALSE;
+ if(length>=0&&OP_UNLIKELY(last>=length))return OP_FALSE;
+ *_first=first;
+ *_last=last;
+ *_length=length;
+ return 0;
+}
+
+/*Parse the Connection response header and look for a "close" token.
+ Return: 1 if a "close" token is found, 0 if it's not found, and a negative
+ value on error.*/
+static int op_http_parse_connection(char *_cdr){
+ size_t d;
+ int ret;
+ ret=0;
+ for(;;){
+ d=strcspn(_cdr,OP_HTTP_CTOKEN);
+ if(OP_UNLIKELY(d<=0))return OP_FALSE;
+ if(op_strncasecmp(_cdr,"close",(int)d)==0)ret=1;
+ /*We're supposed to strip and ignore any headers mentioned in the
+ Connection header if this response is from an HTTP/1.0 server (to
+ work around forwarding of hop-by-hop headers by old proxies), but the
+ only hop-by-hop header we look at is Connection itself.
+ Everything else is a well-defined end-to-end header, and going back and
+ undoing the things we did based on already-examined headers would be
+ hard (since we only scan them once, in a destructive manner).
+ Therefore we just ignore all the other tokens.*/
+ _cdr+=d;
+ d=op_http_lwsspn(_cdr);
+ if(d<=0)break;
+ _cdr+=d;
+ }
+ return OP_UNLIKELY(*_cdr!='\0')?OP_FALSE:ret;
+}
+
+typedef int (*op_ssl_step_func)(SSL *_ssl_conn);
+
+/*Try to run an SSL function to completion (blocking if necessary).*/
+static int op_do_ssl_step(SSL *_ssl_conn,op_sock _fd,op_ssl_step_func _step){
+ struct pollfd fd;
+ fd.fd=_fd;
+ for(;;){
+ int ret;
+ int err;
+ ret=(*_step)(_ssl_conn);
+ if(ret>=0)return ret;
+ err=SSL_get_error(_ssl_conn,ret);
+ if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+ else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+ else return OP_FALSE;
+ if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_FALSE;
+ }
+}
+
+/*Implement a BIO type that just indicates every operation should be retried.
+ We use this when initializing an SSL connection via a proxy to allow the
+ initial handshake to proceed all the way up to the first read attempt, and
+ then return.
+ This allows the TLS client hello message to be pipelined with the HTTP
+ CONNECT request.*/
+
+static int op_bio_retry_write(BIO *_b,const char *_buf,int _num){
+ (void)_buf;
+ (void)_num;
+ BIO_clear_retry_flags(_b);
+ BIO_set_retry_write(_b);
+ return -1;
+}
+
+static int op_bio_retry_read(BIO *_b,char *_buf,int _num){
+ (void)_buf;
+ (void)_num;
+ BIO_clear_retry_flags(_b);
+ BIO_set_retry_read(_b);
+ return -1;
+}
+
+static int op_bio_retry_puts(BIO *_b,const char *_str){
+ return op_bio_retry_write(_b,_str,0);
+}
+
+static long op_bio_retry_ctrl(BIO *_b,int _cmd,long _num,void *_ptr){
+ long ret;
+ (void)_b;
+ (void)_num;
+ (void)_ptr;
+ ret=0;
+ switch(_cmd){
+ case BIO_CTRL_RESET:
+ case BIO_C_RESET_READ_REQUEST:{
+ BIO_clear_retry_flags(_b);
+ /*Fall through.*/
+ }
+ case BIO_CTRL_EOF:
+ case BIO_CTRL_SET:
+ case BIO_CTRL_SET_CLOSE:
+ case BIO_CTRL_FLUSH:
+ case BIO_CTRL_DUP:{
+ ret=1;
+ }break;
+ }
+ return ret;
+}
+
+static int op_bio_retry_new(BIO *_b){
+ _b->init=1;
+ _b->num=0;
+ _b->ptr=NULL;
+ return 1;
+}
+
+static int op_bio_retry_free(BIO *_b){
+ return _b!=NULL;
+}
+
+/*This is not const because OpenSSL doesn't allow it, even though it won't
+ write to it.*/
+static BIO_METHOD op_bio_retry_method={
+ BIO_TYPE_NULL,
+ "retry",
+ op_bio_retry_write,
+ op_bio_retry_read,
+ op_bio_retry_puts,
+ NULL,
+ op_bio_retry_ctrl,
+ op_bio_retry_new,
+ op_bio_retry_free,
+ NULL
+};
+
+/*Establish a CONNECT tunnel and pipeline the start of the TLS handshake for
+ proxying https URL requests.*/
+static int op_http_conn_establish_tunnel(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,op_sock _fd,SSL *_ssl_conn,BIO *_ssl_bio){
+ BIO *retry_bio;
+ char *status_code;
+ char *next;
+ int ret;
+ _conn->ssl_conn=NULL;
+ _conn->fd=_fd;
+ OP_ASSERT(_stream->proxy_connect.nbuf>0);
+ ret=op_http_conn_write_fully(_conn,
+ _stream->proxy_connect.buf,_stream->proxy_connect.nbuf);
+ if(OP_UNLIKELY(ret<0))return ret;
+ retry_bio=BIO_new(&op_bio_retry_method);
+ if(OP_UNLIKELY(retry_bio==NULL))return OP_EFAULT;
+ SSL_set_bio(_ssl_conn,retry_bio,_ssl_bio);
+ SSL_set_connect_state(_ssl_conn);
+ /*This shouldn't succeed, since we can't read yet.*/
+ OP_ALWAYS_TRUE(SSL_connect(_ssl_conn)<0);
+ SSL_set_bio(_ssl_conn,_ssl_bio,_ssl_bio);
+ /*Only now do we disable write coalescing, to allow the CONNECT
+ request and the start of the TLS handshake to be combined.*/
+ op_sock_set_tcp_nodelay(_fd,1);
+ ret=op_http_conn_read_response(_conn,&_stream->response);
+ if(OP_UNLIKELY(ret<0))return ret;
+ next=op_http_parse_status_line(NULL,&status_code,_stream->response.buf);
+ /*According to RFC 2817, "Any successful (2xx) response to a
+ CONNECT request indicates that the proxy has established a
+ connection to the requested host and port.*/
+ if(OP_UNLIKELY(next==NULL)||OP_UNLIKELY(status_code[0]!='2'))return OP_FALSE;
+ return 0;
+}
+
+/*Match a host name against a host with a possible wildcard pattern according
+ to the rules of RFC 6125 Section 6.4.3.
+ Return: 0 if the pattern doesn't match, and a non-zero value if it does.*/
+static int op_http_hostname_match(const char *_host,size_t _host_len,
+ ASN1_STRING *_pattern){
+ const char *pattern;
+ size_t host_label_len;
+ size_t host_suffix_len;
+ size_t pattern_len;
+ size_t pattern_label_len;
+ size_t pattern_prefix_len;
+ size_t pattern_suffix_len;
+ pattern=(const char *)ASN1_STRING_data(_pattern);
+ pattern_len=strlen(pattern);
+ /*Check the pattern for embedded NULs.*/
+ if(OP_UNLIKELY(pattern_len!=(size_t)ASN1_STRING_length(_pattern)))return 0;
+ pattern_label_len=strcspn(pattern,".");
+ OP_ASSERT(pattern_label_len<=pattern_len);
+ pattern_prefix_len=strcspn(pattern,"*");
+ if(pattern_prefix_len>=pattern_label_len){
+ /*"The client SHOULD NOT attempt to match a presented identifier in which
+ the wildcard character comprises a label other than the left-most label
+ (e.g., do not match bar.*.example.net)." [RFC 6125 Section 6.4.3]*/
+ if(pattern_prefix_len<pattern_len)return 0;
+ /*If the pattern does not contain a wildcard in the first element, do an
+ exact match.
+ Don't use the system strcasecmp here, as that uses the locale and
+ RFC 4343 makes clear that DNS's case-insensitivity only applies to
+ the ASCII range.*/
+ return _host_len==pattern_len&&op_strncasecmp(_host,pattern,_host_len)==0;
+ }
+ /*"However, the client SHOULD NOT attempt to match a presented identifier
+ where the wildcard character is embedded within an A-label or U-label of
+ an internationalized domain name." [RFC 6125 Section 6.4.3]*/
+ if(op_strncasecmp(pattern,"xn--",4)==0)return 0;
+ host_label_len=strcspn(_host,".");
+ /*Make sure the host has at least two dots, to prevent the wildcard match
+ from being ridiculously wide.
+ We should have already checked to ensure it had at least one.*/
+ if(OP_UNLIKELY(_host[host_label_len]!='.')
+ ||strchr(_host+host_label_len+1,'.')==NULL){
+ return 0;
+ }
+ OP_ASSERT(host_label_len<_host_len);
+ /*"If the wildcard character is the only character of the left-most label in
+ the presented identifier, the client SHOULD NOT compare against anything
+ but the left-most label of the reference identifier (e.g., *.example.com
+ would match foo.example.com but not bar.foo.example.com)." [RFC 6125
+ Section 6.4.3]
+ This is really confusingly worded, as we check this by actually comparing
+ the rest of the pattern for an exact match.
+ We also use the fact that the wildcard must match at least one character,
+ so the left-most label of the hostname must be at least as large as the
+ left-most label of the pattern.*/
+ if(host_label_len<pattern_label_len)return 0;
+ OP_ASSERT(pattern[pattern_prefix_len]=='*');
+ /*"The client MAY match a presented identifier in which the wildcard
+ character is not the only character of the label (e.g., baz*.example.net
+ and *baz.example.net and b*z.example.net would be taken to match
+ baz1.example.net and foobaz.example.net and buzz.example.net,
+ respectively)." [RFC 6125 Section 6.4.3]*/
+ pattern_suffix_len=pattern_len-pattern_prefix_len-1;
+ host_suffix_len=_host_len-host_label_len
+ +pattern_label_len-pattern_prefix_len-1;
+ return pattern_suffix_len==host_suffix_len
+ &&op_strncasecmp(_host,pattern,pattern_prefix_len)==0
+ &&op_strncasecmp(_host+_host_len-host_suffix_len,
+ pattern+pattern_prefix_len+1,host_suffix_len)==0;
+}
+
+/*Convert a host to a numeric address, if possible.
+ Return: A struct addrinfo containing the address, if it was numeric, and NULL
+ otherise.*/
+static struct addrinfo *op_inet_pton(const char *_host){
+ struct addrinfo *addrs;
+ struct addrinfo hints;
+ memset(&hints,0,sizeof(hints));
+ hints.ai_socktype=SOCK_STREAM;
+ hints.ai_flags=AI_NUMERICHOST;
+ if(!getaddrinfo(_host,NULL,&hints,&addrs))return addrs;
+ return NULL;
+}
+
+/*Verify the server's hostname matches the certificate they presented using
+ the procedure from Section 6 of RFC 6125.
+ Return: 0 if the certificate doesn't match, and a non-zero value if it does.*/
+static int op_http_verify_hostname(OpusHTTPStream *_stream,SSL *_ssl_conn){
+ X509 *peer_cert;
+ STACK_OF(GENERAL_NAME) *san_names;
+ char *host;
+ size_t host_len;
+ int ret;
+ host=_stream->url.host;
+ host_len=strlen(host);
+ peer_cert=SSL_get_peer_certificate(_ssl_conn);
+ /*We set VERIFY_PEER, so we shouldn't get here without a certificate.*/
+ if(OP_UNLIKELY(peer_cert==NULL))return 0;
+ ret=0;
+ OP_ASSERT(host_len<INT_MAX);
+ /*RFC 2818 says (after correcting for Eratta 1077): "If a subjectAltName
+ extension of type dNSName is present, that MUST be used as the identity.
+ Otherwise, the (most specific) Common Name field in the Subject field of
+ the certificate MUST be used.
+ Although the use of the Common Name is existing practice, it is deprecated
+ and Certification Authorities are encouraged to use the dNSName
+ instead."
+ "Matching is performed using the matching rules specified by RFC 2459.
+ If more than one identity of a given type is present in the certificate
+ (e.g., more than one dNSName name), a match in any one of the set is
+ considered acceptable.
+ Names may contain the wildcard character * which is condered to match any
+ single domain name component or component fragment.
+ E.g., *.a.com matches foo.a.com but not bar.foo.a.com.
+ f*.com matches foo.com but not bar.com."
+ "In some cases, the URI is specified as an IP address rather than a
+ hostname.
+ In this case, the iPAddress subjectAltName must be present in the
+ certificate and must exactly match the IP in the URI."*/
+ san_names=X509_get_ext_d2i(peer_cert,NID_subject_alt_name,NULL,NULL);
+ if(san_names!=NULL){
+ struct addrinfo *addr;
+ unsigned char *ip;
+ int ip_len;
+ int nsan_names;
+ int sni;
+ /*Check to see if the host was specified as a simple IP address.*/
+ addr=op_inet_pton(host);
+ ip=NULL;
+ ip_len=0;
+ if(addr!=NULL){
+ switch(addr->ai_family){
+ case AF_INET:{
+ struct sockaddr_in *s;
+ s=(struct sockaddr_in *)addr->ai_addr;
+ OP_ASSERT(addr->ai_addrlen>=sizeof(*s));
+ ip=(unsigned char *)&s->sin_addr;
+ ip_len=sizeof(s->sin_addr);
+ }break;
+ case AF_INET6:{
+ struct sockaddr_in6 *s;
+ s=(struct sockaddr_in6 *)addr->ai_addr;
+ OP_ASSERT(addr->ai_addrlen>=sizeof(*s));
+ ip=(unsigned char *)&s->sin6_addr;
+ ip_len=sizeof(s->sin6_addr);
+ }break;
+ }
+ }
+ /*We can only verify fully-qualified domain names.
+ To quote RFC 6125: "The extracted data MUST include only information that
+ can be securely parsed out of the inputs (e.g., parsing the fully
+ qualified DNS domain name out of the "host" component (or its
+ equivalent) of a URI or deriving the application service type from the
+ scheme of a URI) ..."
+ We don't have a way to check (without relying on DNS records, which might
+ be subverted) if this address is fully-qualified.
+ This is particularly problematic when using a CONNECT tunnel, as it is
+ the server that does DNS lookup, not us.
+ However, we are certain that if the hostname has no '.', it is definitely
+ not a fully-qualified domain name (with the exception of crazy TLDs that
+ actually resolve, like "uz", but I am willing to ignore those).
+ RFC 1535 says "...in any event where a '.' exists in a specified name it
+ should be assumed to be a fully qualified domain name (FQDN) and SHOULD
+ be tried as a rooted name first."
+ That doesn't give us any security guarantees, of course (a subverted DNS
+ could fail the original query and our resolver might still retry with a
+ local domain appended).
+ If we don't have a FQDN, just set the number of names to 0, so we'll fail
+ and clean up any resources we allocated.*/
+ if(ip==NULL&&strchr(host,'.')==NULL)nsan_names=0;
+ /*RFC 2459 says there MUST be at least one, but we don't depend on it.*/
+ else nsan_names=sk_GENERAL_NAME_num(san_names);
+ for(sni=0;sni<nsan_names;sni++){
+ const GENERAL_NAME *name;
+ name=sk_GENERAL_NAME_value(san_names,sni);
+ if(ip==NULL){
+ if(name->type==GEN_DNS
+ &&op_http_hostname_match(host,host_len,name->d.dNSName)){
+ ret=1;
+ break;
+ }
+ }
+ else if(name->type==GEN_IPADD){
+ unsigned char *cert_ip;
+ /*If we do have an IP address, compare it directly.
+ RFC 6125: "When the reference identity is an IP address, the identity
+ MUST be converted to the 'network byte order' octet string
+ representation.
+ For IP Version 4, as specified in RFC 791, the octet string will
+ contain exactly four octets.
+ For IP Version 6, as specified in RFC 2460, the octet string will
+ contain exactly sixteen octets.
+ This octet string is then compared against subjectAltName values of
+ type iPAddress.
+ A match occurs if the reference identity octet string and the value
+ octet strings are identical."*/
+ cert_ip=ASN1_STRING_data(name->d.iPAddress);
+ if(ip_len==ASN1_STRING_length(name->d.iPAddress)
+ &&memcmp(ip,cert_ip,ip_len)==0){
+ ret=1;
+ break;
+ }
+ }
+ }
+ sk_GENERAL_NAME_pop_free(san_names,GENERAL_NAME_free);
+ if(addr!=NULL)freeaddrinfo(addr);
+ }
+ /*Do the same FQDN check we did above.
+ We don't do this once in advance for both cases, because in the
+ subjectAltName case we might have an IPv6 address without a dot.*/
+ else if(strchr(host,'.')!=NULL){
+ int last_cn_loc;
+ int cn_loc;
+ /*If there is no subjectAltName, match against commonName.
+ RFC 6125 says that at least one significant CA is known to issue certs
+ with multiple CNs, although it SHOULD NOT.
+ It also says: "The server's identity may also be verified by comparing
+ the reference identity to the Common Name (CN) value in the last
+ Relative Distinguished Name (RDN) of the subject field of the server's
+ certificate (where "last" refers to the DER-encoded order...)."
+ So find the last one and check it.*/
+ cn_loc=-1;
+ do{
+ last_cn_loc=cn_loc;
+ cn_loc=X509_NAME_get_index_by_NID(X509_get_subject_name(peer_cert),
+ NID_commonName,last_cn_loc);
+ }
+ while(cn_loc>=0);
+ ret=last_cn_loc>=0
+ &&op_http_hostname_match(host,host_len,
+ X509_NAME_ENTRY_get_data(
+ X509_NAME_get_entry(X509_get_subject_name(peer_cert),last_cn_loc)));
+ }
+ X509_free(peer_cert);
+ return ret;
+}
+
+/*Perform the TLS handshake on a new connection.*/
+static int op_http_conn_start_tls(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ op_sock _fd,SSL *_ssl_conn){
+ SSL_SESSION *ssl_session;
+ BIO *ssl_bio;
+ int skip_certificate_check;
+ int ret;
+ ssl_bio=BIO_new_socket(_fd,BIO_NOCLOSE);
+ if(OP_LIKELY(ssl_bio==NULL))return OP_FALSE;
+# if !defined(OPENSSL_NO_TLSEXT)
+ /*Support for RFC 6066 Server Name Indication.*/
+ SSL_set_tlsext_host_name(_ssl_conn,_stream->url.host);
+# endif
+ /*Resume a previous session if available.*/
+ if(_stream->ssl_session!=NULL){
+ SSL_set_session(_ssl_conn,_stream->ssl_session);
+ }
+ /*If we're proxying, establish the CONNECT tunnel.*/
+ if(_stream->proxy_connect.nbuf>0){
+ ret=op_http_conn_establish_tunnel(_stream,_conn,
+ _fd,_ssl_conn,ssl_bio);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ else{
+ /*Otherwise, just use this socket directly.*/
+ op_sock_set_tcp_nodelay(_fd,1);
+ SSL_set_bio(_ssl_conn,ssl_bio,ssl_bio);
+ SSL_set_connect_state(_ssl_conn);
+ }
+ ret=op_do_ssl_step(_ssl_conn,_fd,SSL_connect);
+ if(OP_UNLIKELY(ret<=0))return OP_FALSE;
+ ssl_session=_stream->ssl_session;
+ skip_certificate_check=_stream->skip_certificate_check;
+ if(ssl_session==NULL||!skip_certificate_check){
+ ret=op_do_ssl_step(_ssl_conn,_fd,SSL_do_handshake);
+ if(OP_UNLIKELY(ret<=0))return OP_FALSE;
+ /*OpenSSL does not do hostname verification, despite the fact that we just
+ passed it the hostname above in the call to SSL_set_tlsext_host_name(),
+ because they are morons.
+ Do it for them.*/
+ if(!skip_certificate_check&&!op_http_verify_hostname(_stream,_ssl_conn)){
+ return OP_FALSE;
+ }
+ if(ssl_session==NULL){
+ /*Save the session for later resumption.*/
+ _stream->ssl_session=SSL_get1_session(_ssl_conn);
+ }
+ }
+ _conn->ssl_conn=_ssl_conn;
+ _conn->fd=_fd;
+ _conn->nrequests_left=OP_PIPELINE_MAX_REQUESTS;
+ return 0;
+}
+
+/*Try to start a connection to the next address in the given list of a given
+ type.
+ _fd: The socket to connect with.
+ [inout] _addr: A pointer to the list of addresses.
+ This will be advanced to the first one that matches the given
+ address family (possibly the current one).
+ _ai_family: The address family to connect to.
+ Return: 1 If the connection was successful.
+ 0 If the connection is in progress.
+ OP_FALSE If the connection failed and there were no more addresses
+ left to try.
+ *_addr will be set to NULL in this case.*/
+static int op_sock_connect_next(op_sock _fd,
+ const struct addrinfo **_addr,int _ai_family){
+ const struct addrinfo *addr;
+ int err;
+ addr=*_addr;
+ for(;;){
+ /*Move to the next address of the requested type.*/
+ for(;addr!=NULL&&addr->ai_family!=_ai_family;addr=addr->ai_next);
+ *_addr=addr;
+ /*No more: failure.*/
+ if(addr==NULL)return OP_FALSE;
+ if(connect(_fd,addr->ai_addr,addr->ai_addrlen)>=0)return 1;
+ err=op_errno();
+ /*Winsock will set WSAEWOULDBLOCK.*/
+ if(OP_LIKELY(err==EINPROGRESS||err==EWOULDBLOCK))return 0;
+ addr=addr->ai_next;
+ }
+}
+
+/*The number of address families to try connecting to simultaneously.*/
+# define OP_NPROTOS (2)
+
+static int op_http_connect_impl(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ const struct addrinfo *_addrs,struct timeb *_start_time){
+ const struct addrinfo *addr;
+ const struct addrinfo *addrs[OP_NPROTOS];
+ struct pollfd fds[OP_NPROTOS];
+ int ai_family;
+ int nprotos;
+ int ret;
+ int pi;
+ int pj;
+ for(pi=0;pi<OP_NPROTOS;pi++)addrs[pi]=NULL;
+ /*Try connecting via both IPv4 and IPv6 simultaneously, and keep the first
+ one that succeeds.
+ Start by finding the first address from each family.
+ We order the first connection attempts in the same order the address
+ families were returned in the DNS records in accordance with RFC 6555.*/
+ for(addr=_addrs,nprotos=0;addr!=NULL&&nprotos<OP_NPROTOS;addr=addr->ai_next){
+ if(addr->ai_family==AF_INET6||addr->ai_family==AF_INET){
+ OP_ASSERT(addr->ai_addrlen<=sizeof(struct sockaddr_in6));
+ OP_ASSERT(addr->ai_addrlen<=sizeof(struct sockaddr_in));
+ /*If we've seen this address family before, skip this address for now.*/
+ for(pi=0;pi<nprotos;pi++)if(addrs[pi]->ai_family==addr->ai_family)break;
+ if(pi<nprotos)continue;
+ addrs[nprotos++]=addr;
+ }
+ }
+ /*Pop the connection off the free list and put it on the LRU list.*/
+ OP_ASSERT(_stream->free_head==_conn);
+ _stream->free_head=_conn->next;
+ _conn->next=_stream->lru_head;
+ _stream->lru_head=_conn;
+ ftime(_start_time);
+ *&_conn->read_time=*_start_time;
+ _conn->read_bytes=0;
+ _conn->read_rate=0;
+ /*Try to start a connection to each protocol.
+ RFC 6555 says it is RECOMMENDED that connection attempts be paced
+ 150...250 ms apart "to balance human factors against network load", but
+ that "stateful algorithms" (that's us) "are expected to be more
+ aggressive".
+ We are definitely more aggressive: we don't pace at all.*/
+ for(pi=0;pi<nprotos;pi++){
+ ai_family=addrs[pi]->ai_family;
+ fds[pi].fd=socket(ai_family,SOCK_STREAM,addrs[pi]->ai_protocol);
+ fds[pi].events=POLLOUT;
+ if(OP_LIKELY(fds[pi].fd!=OP_INVALID_SOCKET)){
+ if(OP_LIKELY(op_sock_set_nonblocking(fds[pi].fd,1)>=0)){
+ ret=op_sock_connect_next(fds[pi].fd,addrs+pi,ai_family);
+ if(OP_UNLIKELY(ret>0)){
+ /*It succeeded right away (technically possible), so stop.*/
+ nprotos=pi+1;
+ break;
+ }
+ /*Otherwise go on to the next protocol, and skip the clean-up below.*/
+ else if(ret==0)continue;
+ /*Tried all the addresses for this protocol.*/
+ }
+ /*Clean up the socket.*/
+ close(fds[pi].fd);
+ }
+ /*Remove this protocol from the list.*/
+ memmove(addrs+pi,addrs+pi+1,sizeof(*addrs)*(nprotos-pi-1));
+ nprotos--;
+ pi--;
+ }
+ /*Wait for one of the connections to finish.*/
+ while(pi>=nprotos&&nprotos>0&&poll(fds,nprotos,OP_POLL_TIMEOUT_MS)>0){
+ for(pi=0;pi<nprotos;pi++){
+ socklen_t errlen;
+ int err;
+ /*Still waiting...*/
+ if(!fds[pi].revents)continue;
+ errlen=sizeof(err);
+ /*Some platforms will return the pending error in &err and return 0.
+ Others will put it in errno and return -1.*/
+ ret=getsockopt(fds[pi].fd,SOL_SOCKET,SO_ERROR,&err,&errlen);
+ if(ret<0)err=op_errno();
+ /*Success!*/
+ if(err==0||err==EISCONN)break;
+ /*Move on to the next address for this protocol.*/
+ ai_family=addrs[pi]->ai_family;
+ addrs[pi]=addrs[pi]->ai_next;
+ ret=op_sock_connect_next(fds[pi].fd,addrs+pi,ai_family);
+ /*It succeeded right away, so stop.*/
+ if(ret>0)break;
+ /*Otherwise go on to the next protocol, and skip the clean-up below.*/
+ else if(ret==0)continue;
+ /*Tried all the addresses for this protocol.
+ Remove it from the list.*/
+ close(fds[pi].fd);
+ memmove(fds+pi,fds+pi+1,sizeof(*fds)*(nprotos-pi-1));
+ memmove(addrs+pi,addrs+pi+1,sizeof(*addrs)*(nprotos-pi-1));
+ nprotos--;
+ pi--;
+ }
+ }
+ /*Close all the other sockets.*/
+ for(pj=0;pj<nprotos;pj++)if(pi!=pj)close(fds[pj].fd);
+ /*If none of them succeeded, we're done.*/
+ if(pi>=nprotos)return OP_FALSE;
+ /*Save this address for future connection attempts.*/
+ if(addrs[pi]!=&_stream->addr_info){
+ memcpy(&_stream->addr_info,addrs[pi],sizeof(_stream->addr_info));
+ _stream->addr_info.ai_addr=&_stream->addr.s;
+ _stream->addr_info.ai_next=NULL;
+ memcpy(&_stream->addr,addrs[pi]->ai_addr,addrs[pi]->ai_addrlen);
+ }
+ if(OP_URL_IS_SSL(&_stream->url)){
+ SSL *ssl_conn;
+ /*Start the SSL connection.*/
+ OP_ASSERT(_stream->ssl_ctx!=NULL);
+ ssl_conn=SSL_new(_stream->ssl_ctx);
+ if(OP_LIKELY(ssl_conn!=NULL)){
+ ret=op_http_conn_start_tls(_stream,_conn,fds[pi].fd,ssl_conn);
+ if(OP_LIKELY(ret>=0))return ret;
+ SSL_free(ssl_conn);
+ }
+ close(fds[pi].fd);
+ _conn->fd=OP_INVALID_SOCKET;
+ return OP_FALSE;
+ }
+ /*Just a normal non-SSL connection.*/
+ _conn->ssl_conn=NULL;
+ _conn->fd=fds[pi].fd;
+ _conn->nrequests_left=OP_PIPELINE_MAX_REQUESTS;
+ /*Disable write coalescing.
+ We always send whole requests at once and always parse the response headers
+ before sending another one.*/
+ op_sock_set_tcp_nodelay(fds[pi].fd,1);
+ return 0;
+}
+
+static int op_http_connect(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ const struct addrinfo *_addrs,struct timeb *_start_time){
+ struct timeb resolve_time;
+ struct addrinfo *new_addrs;
+ int ret;
+ /*Re-resolve the host if we need to (RFC 6555 says we MUST do so
+ occasionally).*/
+ new_addrs=NULL;
+ ftime(&resolve_time);
+ if(_addrs!=&_stream->addr_info||op_time_diff_ms(&resolve_time,
+ &_stream->resolve_time)>=OP_RESOLVE_CACHE_TIMEOUT_MS){
+ new_addrs=op_resolve(_stream->connect_host,_stream->connect_port);
+ if(OP_LIKELY(new_addrs!=NULL)){
+ _addrs=new_addrs;
+ *&_stream->resolve_time=*&resolve_time;
+ }
+ else if(OP_LIKELY(_addrs==NULL))return OP_FALSE;
+ }
+ ret=op_http_connect_impl(_stream,_conn,_addrs,_start_time);
+ if(new_addrs!=NULL)freeaddrinfo(new_addrs);
+ return ret;
+}
+
+# define OP_BASE64_LENGTH(_len) (((_len)+2)/3*4)
+
+static const char BASE64_TABLE[64]={
+ 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
+ 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
+ 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
+ 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
+};
+
+static char *op_base64_encode(char *_dst,const char *_src,int _len){
+ unsigned s0;
+ unsigned s1;
+ unsigned s2;
+ int ngroups;
+ int i;
+ ngroups=_len/3;
+ for(i=0;i<ngroups;i++){
+ s0=_src[3*i+0];
+ s1=_src[3*i+1];
+ s2=_src[3*i+2];
+ _dst[4*i+0]=BASE64_TABLE[s0>>2];
+ _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4|s1>>4];
+ _dst[4*i+2]=BASE64_TABLE[(s1&15)<<2|s2>>6];
+ _dst[4*i+3]=BASE64_TABLE[s2&63];
+ }
+ _len-=3*i;
+ if(_len==1){
+ s0=_src[3*i+0];
+ _dst[4*i+0]=BASE64_TABLE[s0>>2];
+ _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4];
+ _dst[4*i+2]='=';
+ _dst[4*i+3]='=';
+ i++;
+ }
+ else if(_len==2){
+ s0=_src[3*i+0];
+ s1=_src[3*i+1];
+ _dst[4*i+0]=BASE64_TABLE[s0>>2];
+ _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4|s1>>4];
+ _dst[4*i+2]=BASE64_TABLE[(s1&15)<<2];
+ _dst[4*i+3]='=';
+ i++;
+ }
+ _dst[4*i]='\0';
+ return _dst+4*i;
+}
+
+/*Construct an HTTP authorization header using RFC 2617's Basic Authentication
+ Scheme and append it to the given string buffer.*/
+static int op_sb_append_basic_auth_header(OpusStringBuf *_sb,
+ const char *_header,const char *_user,const char *_pass){
+ int user_len;
+ int pass_len;
+ int user_pass_len;
+ int base64_len;
+ int nbuf_total;
+ int ret;
+ ret=op_sb_append_string(_sb,_header);
+ ret|=op_sb_append(_sb,": Basic ",8);
+ user_len=strlen(_user);
+ pass_len=strlen(_pass);
+ if(OP_UNLIKELY(pass_len>INT_MAX-user_len))return OP_EFAULT;
+ if(OP_UNLIKELY(user_len+pass_len>(INT_MAX>>2)*3-3))return OP_EFAULT;
+ user_pass_len=user_len+1+pass_len;
+ base64_len=OP_BASE64_LENGTH(user_pass_len);
+ /*Stick "user:pass" at the end of the buffer so we can Base64 encode it
+ in-place.*/
+ nbuf_total=_sb->nbuf;
+ if(OP_UNLIKELY(base64_len>INT_MAX-nbuf_total))return OP_EFAULT;
+ nbuf_total+=base64_len;
+ ret|=op_sb_ensure_capacity(_sb,nbuf_total);
+ if(OP_UNLIKELY(ret<0))return ret;
+ _sb->nbuf=nbuf_total-user_pass_len;
+ OP_ALWAYS_TRUE(!op_sb_append(_sb,_user,user_len));
+ OP_ALWAYS_TRUE(!op_sb_append(_sb,":",1));
+ OP_ALWAYS_TRUE(!op_sb_append(_sb,_pass,pass_len));
+ op_base64_encode(_sb->buf+nbuf_total-base64_len,
+ _sb->buf+nbuf_total-user_pass_len,user_pass_len);
+ return op_sb_append(_sb,"\r\n",2);
+}
+
+static int op_http_allow_pipelining(const char *_server){
+ /*Servers known to do bad things with pipelined requests.
+ This list is taken from Gecko's nsHttpConnection::SupportsPipelining() (in
+ netwerk/protocol/http/nsHttpConnection.cpp).*/
+ static const char *BAD_SERVERS[]={
+ "EFAServer/",
+ "Microsoft-IIS/4.",
+ "Microsoft-IIS/5.",
+ "Netscape-Enterprise/3.",
+ "Netscape-Enterprise/4.",
+ "Netscape-Enterprise/5.",
+ "Netscape-Enterprise/6.",
+ "WebLogic 3.",
+ "WebLogic 4.",
+ "WebLogic 5.",
+ "WebLogic 6.",
+ "Winstone Servlet Engine v0."
+ };
+# define NBAD_SERVERS ((int)(sizeof(BAD_SERVERS)/sizeof(*BAD_SERVERS)))
+ if(*_server>='E'&&*_server<='W'){
+ int si;
+ for(si=0;si<NBAD_SERVERS;si++){
+ if(strncmp(_server,BAD_SERVERS[si],strlen(BAD_SERVERS[si]))==0){
+ return 0;
+ }
+ }
+ }
+ return 1;
+# undef NBAD_SERVERS
+}
+
+static int op_http_stream_open(OpusHTTPStream *_stream,const char *_url,
+ int _skip_certificate_check,const char *_proxy_host,unsigned _proxy_port,
+ const char *_proxy_user,const char *_proxy_pass,OpusServerInfo *_info){
+ struct addrinfo *addrs;
+ int nredirs;
+ int ret;
+#if defined(_WIN32)
+ op_init_winsock();
+#endif
+ ret=op_parse_url(&_stream->url,_url);
+ if(OP_UNLIKELY(ret<0))return ret;
+ if(_proxy_host!=NULL){
+ if(OP_UNLIKELY(_proxy_port>65535U))return OP_EINVAL;
+ _stream->connect_host=op_string_dup(_proxy_host);
+ _stream->connect_port=_proxy_port;
+ }
+ else{
+ _stream->connect_host=_stream->url.host;
+ _stream->connect_port=_stream->url.port;
+ }
+ addrs=NULL;
+ for(nredirs=0;nredirs<OP_REDIRECT_LIMIT;nredirs++){
+ OpusParsedURL next_url;
+ struct timeb start_time;
+ struct timeb end_time;
+ char *next;
+ char *status_code;
+ int minor_version_pos;
+ int v1_1_compat;
+ /*Initialize the SSL library if necessary.*/
+ if(OP_URL_IS_SSL(&_stream->url)&&_stream->ssl_ctx==NULL){
+ SSL_CTX *ssl_ctx;
+# if !defined(OPENSSL_NO_LOCKING)
+ /*The documentation says SSL_library_init() is not reentrant.
+ We don't want to add our own depenencies on a threading library, and it
+ appears that it's safe to call OpenSSL's locking functions before the
+ library is initialized, so that's what we'll do (really OpenSSL should
+ do this for us).
+ This doesn't guarantee that _other_ threads in the application aren't
+ calling SSL_library_init() at the same time, but there's not much we
+ can do about that.*/
+ CRYPTO_w_lock(CRYPTO_LOCK_SSL);
+# endif
+ SSL_library_init();
+ /*Needed to get SHA2 algorithms with old OpenSSL versions.*/
+ OpenSSL_add_ssl_algorithms();
+# if !defined(OPENSSL_NO_LOCKING)
+ CRYPTO_w_unlock(CRYPTO_LOCK_SSL);
+# endif
+ ssl_ctx=SSL_CTX_new(SSLv23_client_method());
+ if(ssl_ctx==NULL)return OP_EFAULT;
+ if(!_skip_certificate_check){
+ /*We don't do anything if this fails, since it just means we won't load
+ any certificates (and thus all checks will fail).
+ However, as that is probably the result of a system
+ mis-configuration, assert here to make it easier to identify.*/
+ OP_ALWAYS_TRUE(SSL_CTX_set_default_verify_paths(ssl_ctx));
+ SSL_CTX_set_verify(ssl_ctx,SSL_VERIFY_PEER,NULL);
+ }
+ _stream->ssl_ctx=ssl_ctx;
+ _stream->skip_certificate_check=_skip_certificate_check;
+ if(_proxy_host!=NULL){
+ /*We need to establish a CONNECT tunnel to handle https proxying.
+ Build the request we'll send to do so.*/
+ _stream->proxy_connect.nbuf=0;
+ ret=op_sb_append(&_stream->proxy_connect,"CONNECT ",8);
+ ret|=op_sb_append_string(&_stream->proxy_connect,_stream->url.host);
+ ret|=op_sb_append_port(&_stream->proxy_connect,_stream->url.port);
+ /*CONNECT requires at least HTTP 1.1.*/
+ ret|=op_sb_append(&_stream->proxy_connect," HTTP/1.1\r\n",11);
+ ret|=op_sb_append(&_stream->proxy_connect,"Host: ",6);
+ ret|=op_sb_append_string(&_stream->proxy_connect,_stream->url.host);
+ /*The example in RFC 2817 Section 5.2 specifies an explicit port even
+ when connecting to the default port.
+ Given that the proxy doesn't know whether we're trying to connect to
+ an http or an https URL except by the port number, this seems like a
+ good idea.*/
+ ret|=op_sb_append_port(&_stream->proxy_connect,_stream->url.port);
+ ret|=op_sb_append(&_stream->proxy_connect,"\r\n",2);
+ ret|=op_sb_append(&_stream->proxy_connect,"User-Agent: .\r\n",15);
+ if(_proxy_user!=NULL&&_proxy_pass!=NULL){
+ ret|=op_sb_append_basic_auth_header(&_stream->proxy_connect,
+ "Proxy-Authorization",_proxy_user,_proxy_pass);
+ }
+ /*For backwards compatibility.*/
+ ret|=op_sb_append(&_stream->proxy_connect,
+ "Proxy-Connection: keep-alive\r\n",30);
+ ret|=op_sb_append(&_stream->proxy_connect,"\r\n",2);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ }
+ /*Actually make the connection.*/
+ ret=op_http_connect(_stream,_stream->conns+0,addrs,&start_time);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*Build the request to send.*/
+ _stream->request.nbuf=0;
+ ret=op_sb_append(&_stream->request,"GET ",4);
+ ret|=op_sb_append_string(&_stream->request,
+ _proxy_host!=NULL?_url:_stream->url.path);
+ /*Send HTTP/1.0 by default for maximum compatibility (so we don't have to
+ re-try if HTTP/1.1 fails, though it shouldn't, even for a 1.0 server).
+ This means we aren't conditionally compliant with RFC 2145, because we
+ violate the requirement that "An HTTP client SHOULD send a request
+ version equal to the highest version for which the client is at least
+ conditionally compliant...".
+ According to RFC 2145, that means we can't claim any compliance with any
+ IETF HTTP specification.*/
+ ret|=op_sb_append(&_stream->request," HTTP/1.0\r\n",11);
+ /*Remember where this is so we can upgrade to HTTP/1.1 if the server
+ supports it.*/
+ minor_version_pos=_stream->request.nbuf-3;
+ ret|=op_sb_append(&_stream->request,"Host: ",6);
+ ret|=op_sb_append_string(&_stream->request,_stream->url.host);
+ if(!OP_URL_IS_DEFAULT_PORT(&_stream->url)){
+ ret|=op_sb_append_port(&_stream->request,_stream->url.port);
+ }
+ ret|=op_sb_append(&_stream->request,"\r\n",2);
+ /*User-Agents have been a bad idea, so send as little as possible.
+ RFC 2616 requires at least one token in the User-Agent, which must have
+ at least one character.*/
+ ret|=op_sb_append(&_stream->request,"User-Agent: .\r\n",15);
+ if(_proxy_host!=NULL&&!OP_URL_IS_SSL(&_stream->url)
+ &&_proxy_user!=NULL&&_proxy_pass!=NULL){
+ ret|=op_sb_append_basic_auth_header(&_stream->request,
+ "Proxy-Authorization",_proxy_user,_proxy_pass);
+ }
+ if(_stream->url.user!=NULL&&_stream->url.pass!=NULL){
+ ret|=op_sb_append_basic_auth_header(&_stream->request,
+ "Authorization",_stream->url.user,_stream->url.pass);
+ }
+ /*Always send a Referer [sic] header.
+ It's common to refuse to serve a resource unless one is present.
+ We just use the relative "/" URI to suggest we came from the same domain,
+ as this is the most common check.
+ This might violate RFC 2616's mandate that the field "MUST NOT be sent if
+ the Request-URI was obtained from a source that does not have its own
+ URI, such as input from the user keyboard," but we don't really have any
+ way to know.*/
+ /*TODO: Should we update this on redirects?*/
+ ret|=op_sb_append(&_stream->request,"Referer: /\r\n",12);
+ /*Always send a Range request header to find out if we're seekable.
+ This requires an HTTP/1.1 server to succeed, but we'll still get what we
+ want with an HTTP/1.0 server that ignores this request header.*/
+ ret|=op_sb_append(&_stream->request,"Range: bytes=0-\r\n",17);
+ /*Remember where this is so we can append offsets to it later.*/
+ _stream->request_tail=_stream->request.nbuf-4;
+ ret|=op_sb_append(&_stream->request,"\r\n",2);
+ if(OP_UNLIKELY(ret<0))return ret;
+ ret=op_http_conn_write_fully(_stream->conns+0,
+ _stream->request.buf,_stream->request.nbuf);
+ if(OP_UNLIKELY(ret<0))return ret;
+ ret=op_http_conn_read_response(_stream->conns+0,&_stream->response);
+ if(OP_UNLIKELY(ret<0))return ret;
+ ftime(&end_time);
+ next=op_http_parse_status_line(&v1_1_compat,&status_code,
+ _stream->response.buf);
+ if(OP_UNLIKELY(next==NULL))return OP_FALSE;
+ if(status_code[0]=='2'){
+ opus_int64 content_length;
+ opus_int64 range_length;
+ int pipeline_supported;
+ int pipeline_disabled;
+ /*We only understand 20x codes.*/
+ if(status_code[1]!='0')return OP_FALSE;
+ content_length=-1;
+ range_length=-1;
+ /*Pipelining must be explicitly enabled.*/
+ pipeline_supported=0;
+ pipeline_disabled=0;
+ for(;;){
+ char *header;
+ char *cdr;
+ ret=op_http_get_next_header(&header,&cdr,&next);
+ if(OP_UNLIKELY(ret<0))return ret;
+ if(header==NULL)break;
+ if(strcmp(header,"content-length")==0){
+ /*Two Content-Length headers?*/
+ if(OP_UNLIKELY(content_length>=0))return OP_FALSE;
+ content_length=op_http_parse_content_length(cdr);
+ if(OP_UNLIKELY(content_length<0))return (int)content_length;
+ /*Make sure the Content-Length and Content-Range headers match.*/
+ if(range_length>=0&&OP_UNLIKELY(content_length!=range_length)){
+ return OP_FALSE;
+ }
+ }
+ else if(strcmp(header,"content-range")==0){
+ opus_int64 range_first;
+ opus_int64 range_last;
+ /*Two Content-Range headers?*/
+ if(OP_UNLIKELY(range_length>=0))return OP_FALSE;
+ ret=op_http_parse_content_range(&range_first,&range_last,
+ &range_length,cdr);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*"A response with satus code 206 (Partial Content) MUST NOT
+ include a Content-Range field with a byte-range-resp-spec of
+ '*'."*/
+ if(status_code[2]=='6'
+ &&(OP_UNLIKELY(range_first<0)||OP_UNLIKELY(range_last<0))){
+ return OP_FALSE;
+ }
+ /*We asked for the entire resource.*/
+ if(range_length>=0){
+ /*Quit if we didn't get it.*/
+ if(range_last>=0&&OP_UNLIKELY(range_last!=range_length-1)){
+ return OP_FALSE;
+ }
+ }
+ /*If there was no length, use the end of the range.*/
+ else if(range_last>=0)range_length=range_last+1;
+ /*Make sure the Content-Length and Content-Range headers match.*/
+ if(content_length>=0&&OP_UNLIKELY(content_length!=range_length)){
+ return OP_FALSE;
+ }
+ }
+ else if(strcmp(header,"connection")==0){
+ /*According to RFC 2616, if an HTTP/1.1 application does not support
+ pipelining, it "MUST include the 'close' connection option in
+ every message."
+ Therefore, if we receive one in the initial response, disable
+ pipelining entirely.
+ The server still might support it (e.g., we might just have hit the
+ request limit for a temporary child process), but if it doesn't
+ and we assume it does, every time we cross a chunk boundary we'll
+ error out and reconnect, adding lots of latency.*/
+ ret=op_http_parse_connection(cdr);
+ if(OP_UNLIKELY(ret<0))return ret;
+ pipeline_disabled|=ret;
+ }
+ else if(strcmp(header,"server")==0){
+ /*If we got a Server response header, and it wasn't from a known-bad
+ server, enable pipelining, as long as it's at least HTTP/1.1.
+ According to RFC 2145, the server is supposed to respond with the
+ highest minor version number it supports unless it is known or
+ suspected that we incorrectly implement the HTTP specification.
+ So it should send back at least HTTP/1.1, despite our HTTP/1.0
+ request.*/
+ pipeline_supported=v1_1_compat;
+ if(v1_1_compat)pipeline_disabled|=!op_http_allow_pipelining(cdr);
+ if(_info!=NULL&&_info->server==NULL)_info->server=op_string_dup(cdr);
+ }
+ /*Collect station information headers if the caller requested it.
+ If there's more than one copy of a header, the first one wins.*/
+ else if(_info!=NULL){
+ if(strcmp(header,"content-type")==0){
+ if(_info->content_type==NULL){
+ _info->content_type=op_string_dup(cdr);
+ }
+ }
+ else if(header[0]=='i'&&header[1]=='c'
+ &&(header[2]=='e'||header[2]=='y')&&header[3]=='-'){
+ if(strcmp(header+4,"name")==0){
+ if(_info->name==NULL)_info->name=op_string_dup(cdr);
+ }
+ else if(strcmp(header+4,"description")==0){
+ if(_info->description==NULL)_info->description=op_string_dup(cdr);
+ }
+ else if(strcmp(header+4,"genre")==0){
+ if(_info->genre==NULL)_info->genre=op_string_dup(cdr);
+ }
+ else if(strcmp(header+4,"url")==0){
+ if(_info->url==NULL)_info->url=op_string_dup(cdr);
+ }
+ else if(strcmp(header,"icy-br")==0
+ ||strcmp(header,"ice-bitrate")==0){
+ if(_info->bitrate_kbps<0){
+ opus_int64 bitrate_kbps;
+ /*Just re-using this function to parse a random unsigned
+ integer field.*/
+ bitrate_kbps=op_http_parse_content_length(cdr);
+ if(bitrate_kbps>=0&&bitrate_kbps<=OP_INT32_MAX){
+ _info->bitrate_kbps=(opus_int32)bitrate_kbps;
+ }
+ }
+ }
+ else if(strcmp(header,"icy-pub")==0
+ ||strcmp(header,"ice-public")==0){
+ if(_info->is_public<0&&(cdr[0]=='0'||cdr[0]=='1')&&cdr[1]=='\0'){
+ _info->is_public=cdr[0]-'0';
+ }
+ }
+ }
+ }
+ }
+ switch(status_code[2]){
+ /*200 OK*/
+ case '0':break;
+ /*203 Non-Authoritative Information*/
+ case '3':break;
+ /*204 No Content*/
+ case '4':{
+ if(content_length>=0&&OP_UNLIKELY(content_length!=0)){
+ return OP_FALSE;
+ }
+ }break;
+ /*206 Partial Content*/
+ case '6':{
+ /*No Content-Range header.*/
+ if(OP_UNLIKELY(range_length<0))return OP_FALSE;
+ content_length=range_length;
+ /*The server supports range requests for this resource.
+ We can seek.*/
+ _stream->seekable=1;
+ }break;
+ /*201 Created: the response "SHOULD include an entity containing a list
+ of resource characteristics and location(s)," but not an Opus file.
+ 202 Accepted: the response "SHOULD include an indication of request's
+ current status and either a pointer to a status monitor or some
+ estimate of when the user can expect the request to be fulfilled,"
+ but not an Opus file.
+ 205 Reset Content: this "MUST NOT include an entity," meaning no Opus
+ file.
+ 207...209 are not yet defined, so we don't know how to handle them.*/
+ default:return OP_FALSE;
+ }
+ _stream->content_length=content_length;
+ _stream->pipeline=pipeline_supported&&!pipeline_disabled;
+ /*Pipelining requires HTTP/1.1 persistent connections.*/
+ if(_stream->pipeline)_stream->request.buf[minor_version_pos]='1';
+ _stream->conns[0].pos=0;
+ _stream->conns[0].end_pos=_stream->seekable?content_length:-1;
+ _stream->conns[0].chunk_size=-1;
+ _stream->cur_conni=0;
+ _stream->connect_rate=op_time_diff_ms(&end_time,&start_time);
+ _stream->connect_rate=OP_MAX(_stream->connect_rate,1);
+ if(_info!=NULL)_info->is_ssl=OP_URL_IS_SSL(&_stream->url);
+ /*The URL has been successfully opened.*/
+ return 0;
+ }
+ /*Shouldn't get 1xx; 4xx and 5xx are both failures (and we don't retry).
+ Everything else is undefined.*/
+ else if(status_code[0]!='3')return OP_FALSE;
+ /*We have some form of redirect request.*/
+ /*We only understand 30x codes.*/
+ if(status_code[1]!='0')return OP_FALSE;
+ switch(status_code[2]){
+ /*300 Multiple Choices: "If the server has a preferred choice of
+ representation, it SHOULD include the specific URI for that
+ representation in the Location field," otherwise we'll fail.*/
+ case '0':
+ /*301 Moved Permanently*/
+ case '1':
+ /*302 Found*/
+ case '2':
+ /*307 Temporary Redirect*/
+ case '7':
+ /*308 Permanent Redirect (defined by draft-reschke-http-status-308-07).*/
+ case '8':break;
+ /*305 Use Proxy: "The Location field gives the URI of the proxy."
+ TODO: This shouldn't actually be that hard to do.*/
+ case '5':return OP_EIMPL;
+ /*303 See Other: "The new URI is not a substitute reference for the
+ originally requested resource."
+ 304 Not Modified: "The 304 response MUST NOT contain a message-body."
+ 306 (Unused)
+ 309 is not yet defined, so we don't know how to handle it.*/
+ default:return OP_FALSE;
+ }
+ _url=NULL;
+ for(;;){
+ char *header;
+ char *cdr;
+ ret=op_http_get_next_header(&header,&cdr,&next);
+ if(OP_UNLIKELY(ret<0))return ret;
+ if(header==NULL)break;
+ if(strcmp(header,"location")==0&&OP_LIKELY(_url==NULL))_url=cdr;
+ }
+ if(OP_UNLIKELY(_url==NULL))return OP_FALSE;
+ ret=op_parse_url(&next_url,_url);
+ if(OP_UNLIKELY(ret<0))return ret;
+ if(_proxy_host==NULL||_stream->ssl_session!=NULL){
+ if(strcmp(_stream->url.host,next_url.host)==0
+ &&_stream->url.port==next_url.port){
+ /*Try to skip re-resolve when connecting to the same host.*/
+ addrs=&_stream->addr_info;
+ }
+ else{
+ if(_stream->ssl_session!=NULL){
+ /*Forget any cached SSL session from the last host.*/
+ SSL_SESSION_free(_stream->ssl_session);
+ _stream->ssl_session=NULL;
+ }
+ }
+ }
+ if(_proxy_host==NULL){
+ OP_ASSERT(_stream->connect_host==_stream->url.host);
+ _stream->connect_host=next_url.host;
+ _stream->connect_port=next_url.port;
+ }
+ /*Always try to skip re-resolve for proxy connections.*/
+ else addrs=&_stream->addr_info;
+ op_parsed_url_clear(&_stream->url);
+ *&_stream->url=*&next_url;
+ /*TODO: On servers/proxies that support pipelining, we might be able to
+ re-use this connection.*/
+ op_http_conn_close(_stream,_stream->conns+0,&_stream->lru_head,1);
+ }
+ /*Redirection limit reached.*/
+ return OP_FALSE;
+}
+
+static int op_http_conn_send_request(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,opus_int64 _pos,opus_int32 _chunk_size,
+ int _try_not_to_block){
+ opus_int64 next_end;
+ int ret;
+ /*We shouldn't have another request outstanding.*/
+ OP_ASSERT(_conn->next_pos<0);
+ /*Build the request to send.*/
+ OP_ASSERT(_stream->request.nbuf>=_stream->request_tail);
+ _stream->request.nbuf=_stream->request_tail;
+ ret=op_sb_append_nonnegative_int64(&_stream->request,_pos);
+ ret|=op_sb_append(&_stream->request,"-",1);
+ if(_chunk_size>0&&OP_ADV_OFFSET(_pos,2*_chunk_size)<_stream->content_length){
+ /*We shouldn't be pipelining requests with non-HTTP/1.1 servers.*/
+ OP_ASSERT(_stream->pipeline);
+ next_end=_pos+_chunk_size;
+ ret|=op_sb_append_nonnegative_int64(&_stream->request,next_end-1);
+ /*Use a larger chunk size for our next request.*/
+ _chunk_size<<=1;
+ /*But after a while, just request the rest of the resource.*/
+ if(_chunk_size>OP_PIPELINE_CHUNK_SIZE_MAX)_chunk_size=-1;
+ }
+ else{
+ /*Either this was a non-pipelined request or we were close enough to the
+ end to just ask for the rest.*/
+ next_end=-1;
+ _chunk_size=-1;
+ }
+ ret|=op_sb_append(&_stream->request,"\r\n\r\n",4);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*If we don't want to block, check to see if there's enough space in the send
+ queue.
+ There's still a chance we might block, even if there is enough space, but
+ it's a much slimmer one.
+ Blocking at all is pretty unlikely, as we won't have any requests queued
+ when _try_not_to_block is set, so if FIONSPACE isn't available (e.g., on
+ Linux), just skip the test.*/
+ if(_try_not_to_block){
+# if defined(FIONSPACE)
+ int available;
+ ret=ioctl(_conn->fd,FIONSPACE,&available);
+ if(ret<0||available<_stream->request.nbuf)return 1;
+# endif
+ }
+ ret=op_http_conn_write_fully(_conn,
+ _stream->request.buf,_stream->request.nbuf);
+ if(OP_UNLIKELY(ret<0))return ret;
+ _conn->next_pos=_pos;
+ _conn->next_end=next_end;
+ /*Save the chunk size to use for the next request.*/
+ _conn->chunk_size=_chunk_size;
+ _conn->nrequests_left--;
+ return ret;
+}
+
+/*Handles the response to all requests after the first one.
+ Return: 1 if the connection was closed or timed out, 0 on success, or a
+ negative value on any other error.*/
+static int op_http_conn_handle_response(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn){
+ char *next;
+ char *status_code;
+ opus_int64 range_length;
+ opus_int64 next_pos;
+ opus_int64 next_end;
+ int ret;
+ ret=op_http_conn_read_response(_conn,&_stream->response);
+ /*If the server just closed the connection on us, we may have just hit a
+ connection re-use limit, so we might want to retry.*/
+ if(OP_UNLIKELY(ret<0))return ret==OP_EREAD?1:ret;
+ next=op_http_parse_status_line(NULL,&status_code,_stream->response.buf);
+ if(OP_UNLIKELY(next==NULL))return OP_FALSE;
+ /*We _need_ a 206 Partial Content response.
+ Nothing else will do.*/
+ if(strncmp(status_code,"206",3)!=0){
+ /*But on a 408 Request Timeout, we might want to re-try.*/
+ return strncmp(status_code,"408",3)==0?1:OP_FALSE;
+ }
+ next_pos=_conn->next_pos;
+ next_end=_conn->next_end;
+ range_length=-1;
+ for(;;){
+ char *header;
+ char *cdr;
+ ret=op_http_get_next_header(&header,&cdr,&next);
+ if(OP_UNLIKELY(ret<0))return ret;
+ if(header==NULL)break;
+ if(strcmp(header,"content-range")==0){
+ opus_int64 range_first;
+ opus_int64 range_last;
+ /*Two Content-Range headers?*/
+ if(OP_UNLIKELY(range_length>=0))return OP_FALSE;
+ ret=op_http_parse_content_range(&range_first,&range_last,
+ &range_length,cdr);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*"A response with satus code 206 (Partial Content) MUST NOT
+ include a Content-Range field with a byte-range-resp-spec of
+ '*'."*/
+ if(OP_UNLIKELY(range_first<0)||OP_UNLIKELY(range_last<0))return OP_FALSE;
+ /*We also don't want range_last to overflow.*/
+ if(OP_UNLIKELY(range_last>=OP_INT64_MAX))return OP_FALSE;
+ range_last++;
+ /*Quit if we didn't get the offset we asked for.*/
+ if(range_first!=next_pos)return OP_FALSE;
+ if(next_end<0){
+ /*We asked for the rest of the resource.*/
+ if(range_length>=0){
+ /*Quit if we didn't get it.*/
+ if(OP_UNLIKELY(range_last!=range_length))return OP_FALSE;
+ }
+ /*If there was no length, use the end of the range.*/
+ else range_length=range_last;
+ next_end=range_last;
+ }
+ else{
+ if(range_last!=next_end)return OP_FALSE;
+ /*If there was no length, use the larger of the content length or the
+ end of this chunk.*/
+ if(range_length<0){
+ range_length=OP_MAX(range_last,_stream->content_length);
+ }
+ }
+ }
+ else if(strcmp(header,"content-length")==0){
+ opus_int64 content_length;
+ /*Validate the Content-Length header, if present, against the request we
+ made.*/
+ content_length=op_http_parse_content_length(cdr);
+ if(OP_UNLIKELY(content_length<0))return (int)content_length;
+ if(next_end<0){
+ /*If we haven't seen the Content-Range header yet and we asked for the
+ rest of the resource, set next_end, so we can make sure they match
+ when we do find the Content-Range header.*/
+ if(OP_UNLIKELY(next_pos>OP_INT64_MAX-content_length))return OP_FALSE;
+ next_end=next_pos+content_length;
+ }
+ /*Otherwise, make sure they match now.*/
+ else if(OP_UNLIKELY(next_end-next_pos!=content_length))return OP_FALSE;
+ }
+ else if(strcmp(header,"connection")==0){
+ ret=op_http_parse_connection(cdr);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*If the server told us it was going to close the connection, don't make
+ any more requests.*/
+ if(OP_UNLIKELY(ret>0))_conn->nrequests_left=0;
+ }
+ }
+ /*No Content-Range header.*/
+ if(OP_UNLIKELY(range_length<0))return OP_FALSE;
+ /*Update the content_length if necessary.*/
+ _stream->content_length=range_length;
+ _conn->pos=next_pos;
+ _conn->end_pos=next_end;
+ _conn->next_pos=-1;
+ return 0;
+}
+
+/*Open a new connection that will start reading at byte offset _pos.
+ _pos: The byte offset to start reading from.
+ _chunk_size: The number of bytes to ask for in the initial request, or -1 to
+ request the rest of the resource.
+ This may be more bytes than remain, in which case it will be
+ converted into a request for the rest.*/
+static int op_http_conn_open_pos(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,opus_int64 _pos,opus_int32 _chunk_size){
+ struct timeb start_time;
+ struct timeb end_time;
+ opus_int32 connect_rate;
+ opus_int32 connect_time;
+ int ret;
+ ret=op_http_connect(_stream,_conn,&_stream->addr_info,&start_time);
+ if(OP_UNLIKELY(ret<0))return ret;
+ ret=op_http_conn_send_request(_stream,_conn,_pos,_chunk_size,0);
+ if(OP_UNLIKELY(ret<0))return ret;
+ ret=op_http_conn_handle_response(_stream,_conn);
+ if(OP_UNLIKELY(ret!=0))return OP_FALSE;
+ ftime(&end_time);
+ _stream->cur_conni=_conn-_stream->conns;
+ OP_ASSERT(_stream->cur_conni>=0&&_stream->cur_conni<OP_NCONNS_MAX);
+ /*The connection has been successfully opened.
+ Update the connection time estimate.*/
+ connect_time=op_time_diff_ms(&end_time,&start_time);
+ connect_rate=_stream->connect_rate;
+ connect_rate+=OP_MAX(connect_time,1)-connect_rate+8>>4;
+ _stream->connect_rate=connect_rate;
+ return 0;
+}
+
+/*Read data from the current response body.
+ If we're pipelining and we get close to the end of this response, queue
+ another request.
+ If we've reached the end of this response body, parse the next response and
+ keep going.
+ [out] _buf: Returns the data read.
+ _buf_size: The size of the buffer.
+ Return: A positive number of bytes read on success.
+ 0: The connection was closed.
+ OP_EREAD: There was a fatal read error.*/
+static int op_http_conn_read_body(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,unsigned char *_buf,int _buf_size){
+ opus_int64 pos;
+ opus_int64 end_pos;
+ opus_int64 next_pos;
+ opus_int64 content_length;
+ int nread;
+ int pipeline;
+ int ret;
+ /*Currently this function can only be called on the LRU head.
+ Otherwise, we'd need a _pnext pointer if we needed to close the connection,
+ and re-opening it would re-organize the lists.*/
+ OP_ASSERT(_stream->lru_head==_conn);
+ /*We should have filterd out empty reads by this point.*/
+ OP_ASSERT(_buf_size>0);
+ pos=_conn->pos;
+ end_pos=_conn->end_pos;
+ next_pos=_conn->next_pos;
+ pipeline=_stream->pipeline;
+ content_length=_stream->content_length;
+ if(end_pos>=0){
+ /*Have we reached the end of the current response body?*/
+ if(pos>=end_pos){
+ OP_ASSERT(content_length>=0);
+ /*If this was the end of the stream, we're done.
+ Also return early if a non-blocking read was requested (regardless of
+ whether we might be able to parse the next response without
+ blocking).*/
+ if(content_length<=end_pos)return 0;
+ /*Otherwise, start on the next response.*/
+ if(next_pos<0){
+ /*We haven't issued another request yet.*/
+ if(!pipeline||_conn->nrequests_left<=0){
+ /*There are two ways to get here: either the server told us it was
+ going to close the connection after the last request, or we
+ thought we were reading the whole resource, but it grew while we
+ were reading it.
+ The only way the latter could have happened is if content_length
+ changed while seeking.
+ Open a new request to read the rest.*/
+ OP_ASSERT(_stream->seekable);
+ /*Try to open a new connection to read another chunk.*/
+ op_http_conn_close(_stream,_conn,&_stream->lru_head,1);
+ /*If we're not pipelining, we should be requesting the rest.*/
+ OP_ASSERT(pipeline||_conn->chunk_size==-1);
+ ret=op_http_conn_open_pos(_stream,_conn,end_pos,_conn->chunk_size);
+ if(OP_UNLIKELY(ret<0))return OP_EREAD;
+ }
+ else{
+ /*Issue the request now (better late than never).*/
+ ret=op_http_conn_send_request(_stream,_conn,pos,_conn->chunk_size,0);
+ if(OP_UNLIKELY(ret<0))return OP_EREAD;
+ next_pos=_conn->next_pos;
+ OP_ASSERT(next_pos>=0);
+ }
+ }
+ if(next_pos>=0){
+ /*We shouldn't be trying to read past the current request body if we're
+ seeking somewhere else.*/
+ OP_ASSERT(next_pos==end_pos);
+ ret=op_http_conn_handle_response(_stream,_conn);
+ if(OP_UNLIKELY(ret<0))return OP_EREAD;
+ if(OP_UNLIKELY(ret>0)&&pipeline){
+ opus_int64 next_end;
+ next_end=_conn->next_end;
+ /*Our request timed out or the server closed the connection.
+ Try re-connecting.*/
+ op_http_conn_close(_stream,_conn,&_stream->lru_head,1);
+ /*Unless there's a bug, we should be able to convert
+ (next_pos,next_end) into valid (_pos,_chunk_size) parameters.*/
+ OP_ASSERT(next_end<0
+ ||next_end-next_pos>=0&&next_end-next_pos<=OP_INT32_MAX);
+ ret=op_http_conn_open_pos(_stream,_conn,next_pos,
+ next_end<0?-1:(opus_int32)(next_end-next_pos));
+ if(OP_UNLIKELY(ret<0))return OP_EREAD;
+ }
+ else if(OP_UNLIKELY(ret!=0))return OP_EREAD;
+ }
+ pos=_conn->pos;
+ end_pos=_conn->end_pos;
+ content_length=_stream->content_length;
+ }
+ OP_ASSERT(end_pos>pos);
+ _buf_size=OP_MIN(_buf_size,end_pos-pos);
+ }
+ nread=op_http_conn_read(_conn,(char *)_buf,_buf_size,1);
+ if(OP_UNLIKELY(nread<0))return nread;
+ pos+=nread;
+ _conn->pos=pos;
+ OP_ASSERT(end_pos<0||content_length>=0);
+ /*TODO: If nrequests_left<=0, we can't make a new request, and there will be
+ a big pause after we hit the end of the chunk while we open a new
+ connection.
+ It would be nice to be able to start that process now, but we have no way
+ to do it in the background without blocking (even if we could start it, we
+ have no guarantee the application will return control to us in a
+ sufficiently timely manner to allow us to complete it, and this is
+ uncommon enough that it's not worth using threads just for this).*/
+ if(end_pos>=0&&end_pos<content_length&&next_pos<0
+ &&pipeline&&OP_LIKELY(_conn->nrequests_left>0)){
+ opus_int64 request_thresh;
+ opus_int32 chunk_size;
+ /*Are we getting close to the end of the current response body?
+ If so, we should request more data.*/
+ request_thresh=_stream->connect_rate*_conn->read_rate>>12;
+ /*But don't commit ourselves too quickly.*/
+ chunk_size=_conn->chunk_size;
+ if(chunk_size>=0)request_thresh=OP_MIN(chunk_size>>2,request_thresh);
+ if(end_pos-pos<request_thresh){
+ ret=op_http_conn_send_request(_stream,_conn,end_pos,_conn->chunk_size,1);
+ if(OP_UNLIKELY(ret<0))return OP_EREAD;
+ }
+ }
+ return nread;
+}
+
+static int op_http_stream_read(void *_stream,
+ unsigned char *_ptr,int _buf_size){
+ OpusHTTPStream *stream;
+ ptrdiff_t nread;
+ opus_int64 size;
+ opus_int64 pos;
+ int ci;
+ stream=(OpusHTTPStream *)_stream;
+ /*Check for an empty read.*/
+ if(_buf_size<=0)return 0;
+ ci=stream->cur_conni;
+ /*No current connection => EOF.*/
+ if(ci<0)return 0;
+ pos=stream->conns[ci].pos;
+ size=stream->content_length;
+ /*Check for EOF.*/
+ if(size>=0){
+ if(pos>=size)return 0;
+ /*Check for a short read.*/
+ if(_buf_size>size-pos)_buf_size=(int)(size-pos);
+ }
+ nread=op_http_conn_read_body(stream,stream->conns+ci,_ptr,_buf_size);
+ if(OP_UNLIKELY(nread<=0)){
+ /*We hit an error or EOF.
+ Either way, we're done with this connection.*/
+ op_http_conn_close(stream,stream->conns+ci,&stream->lru_head,1);
+ stream->cur_conni=-1;
+ stream->pos=pos;
+ }
+ return nread;
+}
+
+/*Discard data until we reach the _target position.
+ This destroys the contents of _stream->response.buf, as we need somewhere to
+ read this data, and that is a convenient place.
+ _just_read_ahead: Whether or not this is a plain fast-forward.
+ If 0, we need to issue a new request for a chunk at _target
+ and discard all the data from our current request(s).
+ Otherwise, we should be able to reach _target without
+ issuing any new requests.
+ _target: The stream position to which to read ahead.*/
+static int op_http_conn_read_ahead(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,int _just_read_ahead,opus_int64 _target){
+ opus_int64 pos;
+ opus_int64 end_pos;
+ opus_int64 next_pos;
+ opus_int64 next_end;
+ ptrdiff_t nread;
+ int ret;
+ pos=_conn->pos;
+ end_pos=_conn->end_pos;
+ next_pos=_conn->next_pos;
+ next_end=_conn->next_end;
+ if(!_just_read_ahead){
+ /*We need to issue a new pipelined request.
+ This is the only case where we allow more than one outstanding request
+ at a time, so we need to reset next_pos (we'll restore it below if we
+ did have an outstanding request).*/
+ OP_ASSERT(_stream->pipeline);
+ _conn->next_pos=-1;
+ ret=op_http_conn_send_request(_stream,_conn,_target,
+ OP_PIPELINE_CHUNK_SIZE,0);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ /*We can reach the target position by reading forward in the current chunk.*/
+ if(_just_read_ahead&&(end_pos<0||_target<end_pos))end_pos=_target;
+ else if(next_pos>=0){
+ opus_int64 next_next_pos;
+ opus_int64 next_next_end;
+ /*We already have a request outstanding.
+ Finish off the current chunk.*/
+ while(pos<end_pos){
+ nread=op_http_conn_read(_conn,_stream->response.buf,
+ (int)OP_MIN(end_pos-pos,_stream->response.cbuf),1);
+ /*We failed to read ahead.*/
+ if(nread<=0)return OP_FALSE;
+ pos+=nread;
+ }
+ OP_ASSERT(pos==end_pos);
+ if(_just_read_ahead){
+ next_next_pos=next_next_end=-1;
+ end_pos=_target;
+ }
+ else{
+ OP_ASSERT(_conn->next_pos==_target);
+ next_next_pos=_target;
+ next_next_end=_conn->next_end;
+ _conn->next_pos=next_pos;
+ _conn->next_end=next_end;
+ end_pos=next_end;
+ }
+ ret=op_http_conn_handle_response(_stream,_conn);
+ if(OP_UNLIKELY(ret!=0))return OP_FALSE;
+ _conn->next_pos=next_next_pos;
+ _conn->next_end=next_next_end;
+ }
+ while(pos<end_pos){
+ nread=op_http_conn_read(_conn,_stream->response.buf,
+ (int)OP_MIN(end_pos-pos,_stream->response.cbuf),1);
+ /*We failed to read ahead.*/
+ if(nread<=0)return OP_FALSE;
+ pos+=nread;
+ }
+ OP_ASSERT(pos==end_pos);
+ if(!_just_read_ahead){
+ ret=op_http_conn_handle_response(_stream,_conn);
+ if(OP_UNLIKELY(ret!=0))return OP_FALSE;
+ }
+ else _conn->pos=end_pos;
+ OP_ASSERT(_conn->pos==_target);
+ return 0;
+}
+
+static int op_http_stream_seek(void *_stream,opus_int64 _offset,int _whence){
+ struct timeb seek_time;
+ OpusHTTPStream *stream;
+ OpusHTTPConn *conn;
+ OpusHTTPConn **pnext;
+ OpusHTTPConn *close_conn;
+ OpusHTTPConn **close_pnext;
+ opus_int64 content_length;
+ opus_int64 pos;
+ int pipeline;
+ int ci;
+ int ret;
+ stream=(OpusHTTPStream *)_stream;
+ if(!stream->seekable)return -1;
+ content_length=stream->content_length;
+ /*If we're seekable, we should have gotten a Content-Length.*/
+ OP_ASSERT(content_length>=0);
+ ci=stream->cur_conni;
+ pos=ci<0?content_length:stream->conns[ci].pos;
+ switch(_whence){
+ case SEEK_SET:{
+ /*Check for overflow:*/
+ if(_offset<0)return -1;
+ pos=_offset;
+ }break;
+ case SEEK_CUR:{
+ /*Check for overflow:*/
+ if(_offset<-pos||_offset>OP_INT64_MAX-pos)return -1;
+ pos+=_offset;
+ }break;
+ case SEEK_END:{
+ /*Check for overflow:*/
+ if(_offset>content_length||_offset<content_length-OP_INT64_MAX)return -1;
+ pos=content_length-_offset;
+ }break;
+ default:return -1;
+ }
+ /*Mark when we deactivated the active connection.*/
+ if(ci>=0){
+ op_http_conn_read_rate_update(stream->conns+ci);
+ *&seek_time=*&stream->conns[ci].read_time;
+ }
+ else ftime(&seek_time);
+ /*If we seeked past the end of the stream, just disable the active
+ connection.*/
+ if(pos>=content_length){
+ stream->cur_conni=-1;
+ stream->pos=pos;
+ return 0;
+ }
+ /*First try to find a connection we can use without waiting.*/
+ pnext=&stream->lru_head;
+ conn=stream->lru_head;
+ while(conn!=NULL){
+ opus_int64 conn_pos;
+ opus_int64 end_pos;
+ int available;
+ /*If this connection has been dormant too long or has made too many
+ requests, close it.
+ This is to prevent us from hitting server limits/firewall timeouts.*/
+ if(op_time_diff_ms(&seek_time,&conn->read_time)>
+ OP_CONNECTION_IDLE_TIMEOUT_MS
+ ||conn->nrequests_left<OP_PIPELINE_MIN_REQUESTS){
+ op_http_conn_close(stream,conn,pnext,1);
+ conn=*pnext;
+ continue;
+ }
+ available=op_http_conn_estimate_available(conn);
+ conn_pos=conn->pos;
+ end_pos=conn->end_pos;
+ if(conn->next_pos>=0){
+ OP_ASSERT(end_pos>=0);
+ OP_ASSERT(conn->next_pos==end_pos);
+ end_pos=conn->next_end;
+ }
+ OP_ASSERT(end_pos<0||conn_pos<=end_pos);
+ /*Can we quickly read ahead without issuing a new request or waiting for
+ any more data?
+ If we have an oustanding request, we'll over-estimate the amount of data
+ it has available (because we'll count the response headers, too), but
+ that probably doesn't matter.*/
+ if(conn_pos<=pos&&pos-conn_pos<=available&&(end_pos<0||pos<end_pos)){
+ /*Found a suitable connection to re-use.*/
+ ret=op_http_conn_read_ahead(stream,conn,1,pos);
+ if(OP_UNLIKELY(ret<0)){
+ /*The connection might have become stale, so close it and keep going.*/
+ op_http_conn_close(stream,conn,pnext,1);
+ conn=*pnext;
+ continue;
+ }
+ /*Sucessfully resurrected this connection.*/
+ *pnext=conn->next;
+ conn->next=stream->lru_head;
+ stream->lru_head=conn;
+ stream->cur_conni=conn-stream->conns;
+ return 0;
+ }
+ pnext=&conn->next;
+ conn=conn->next;
+ }
+ /*Chances are that didn't work, so now try to find one we can use by reading
+ ahead a reasonable amount and/or by issuing a new request.*/
+ close_pnext=NULL;
+ close_conn=NULL;
+ pnext=&stream->lru_head;
+ conn=stream->lru_head;
+ pipeline=stream->pipeline;
+ while(conn!=NULL){
+ opus_int64 conn_pos;
+ opus_int64 end_pos;
+ opus_int64 read_ahead_thresh;
+ int available;
+ int just_read_ahead;
+ /*Dividing by 2048 instead of 1000 scales this by nearly 1/2, biasing away
+ from connection re-use (and roughly compensating for the lag required to
+ reopen the TCP window of a connection that's been idle).
+ There's no overflow checking here, because it's vanishingly unlikely, and
+ all it would do is cause us to make poor decisions.*/
+ read_ahead_thresh=OP_MAX(OP_READAHEAD_THRESH_MIN,
+ stream->connect_rate*conn->read_rate>>11);
+ available=op_http_conn_estimate_available(conn);
+ conn_pos=conn->pos;
+ end_pos=conn->end_pos;
+ if(conn->next_pos>=0){
+ OP_ASSERT(end_pos>=0);
+ OP_ASSERT(conn->next_pos==end_pos);
+ end_pos=conn->next_end;
+ }
+ OP_ASSERT(end_pos<0||conn_pos<=end_pos);
+ /*Can we quickly read ahead without issuing a new request?*/
+ just_read_ahead=conn_pos<=pos&&pos-conn_pos-available<=read_ahead_thresh
+ &&(end_pos<0||pos<end_pos);
+ if(just_read_ahead||pipeline&&end_pos>=0
+ &&end_pos-conn_pos-available<=read_ahead_thresh){
+ /*Found a suitable connection to re-use.*/
+ ret=op_http_conn_read_ahead(stream,conn,just_read_ahead,pos);
+ if(OP_UNLIKELY(ret<0)){
+ /*The connection might have become stale, so close it and keep going.*/
+ op_http_conn_close(stream,conn,pnext,1);
+ conn=*pnext;
+ continue;
+ }
+ /*Sucessfully resurrected this connection.*/
+ *pnext=conn->next;
+ conn->next=stream->lru_head;
+ stream->lru_head=conn;
+ stream->cur_conni=conn-stream->conns;
+ return 0;
+ }
+ close_pnext=pnext;
+ close_conn=conn;
+ pnext=&conn->next;
+ conn=conn->next;
+ }
+ /*No suitable connections.
+ Open a new one.*/
+ if(stream->free_head==NULL){
+ /*All connections in use.
+ Expire one of them (we should have already picked which one when scanning
+ the list).*/
+ OP_ASSERT(close_conn!=NULL);
+ OP_ASSERT(close_pnext!=NULL);
+ op_http_conn_close(stream,close_conn,close_pnext,1);
+ }
+ OP_ASSERT(stream->free_head!=NULL);
+ conn=stream->free_head;
+ /*If we can pipeline, only request a chunk of data.
+ If we're seeking now, there's a good chance we will want to seek again
+ soon, and this avoids committing this connection to reading the rest of
+ the stream.
+ Particularly with SSL or proxies, issuing a new request on the same
+ connection can be substantially faster than opening a new one.
+ This also limits the amount of data the server will blast at us on this
+ connection if we later seek elsewhere and start reading from a different
+ connection.*/
+ ret=op_http_conn_open_pos(stream,conn,pos,
+ pipeline?OP_PIPELINE_CHUNK_SIZE:-1);
+ if(OP_UNLIKELY(ret<0)){
+ op_http_conn_close(stream,conn,&stream->lru_head,1);
+ return -1;
+ }
+ return 0;
+}
+
+static opus_int64 op_http_stream_tell(void *_stream){
+ OpusHTTPStream *stream;
+ int ci;
+ stream=(OpusHTTPStream *)_stream;
+ ci=stream->cur_conni;
+ return ci<0?stream->pos:stream->conns[ci].pos;
+}
+
+static int op_http_stream_close(void *_stream){
+ OpusHTTPStream *stream;
+ stream=(OpusHTTPStream *)_stream;
+ if(OP_LIKELY(stream!=NULL)){
+ op_http_stream_clear(stream);
+ _ogg_free(stream);
+ }
+ return 0;
+}
+
+static const OpusFileCallbacks OP_HTTP_CALLBACKS={
+ op_http_stream_read,
+ op_http_stream_seek,
+ op_http_stream_tell,
+ op_http_stream_close
+};
+#endif
+
+void opus_server_info_init(OpusServerInfo *_info){
+ _info->name=NULL;
+ _info->description=NULL;
+ _info->genre=NULL;
+ _info->url=NULL;
+ _info->server=NULL;
+ _info->content_type=NULL;
+ _info->bitrate_kbps=-1;
+ _info->is_public=-1;
+ _info->is_ssl=0;
+}
+
+void opus_server_info_clear(OpusServerInfo *_info){
+ _ogg_free(_info->content_type);
+ _ogg_free(_info->server);
+ _ogg_free(_info->url);
+ _ogg_free(_info->genre);
+ _ogg_free(_info->description);
+ _ogg_free(_info->name);
+}
+
+/*The actual URL stream creation function.
+ This one isn't extensible like the application-level interface, but because
+ it isn't public, we're free to change it in the future.*/
+static void *op_url_stream_create_impl(OpusFileCallbacks *_cb,const char *_url,
+ int _skip_certificate_check,const char *_proxy_host,unsigned _proxy_port,
+ const char *_proxy_user,const char *_proxy_pass,OpusServerInfo *_info){
+ const char *path;
+ /*Check to see if this is a valid file: URL.*/
+ path=op_parse_file_url(_url);
+ if(path!=NULL){
+ char *unescaped_path;
+ void *ret;
+ unescaped_path=op_string_dup(path);
+ if(OP_UNLIKELY(unescaped_path==NULL))return NULL;
+ ret=op_fopen(_cb,op_unescape_url_component(unescaped_path),"rb");
+ _ogg_free(unescaped_path);
+ return ret;
+ }
+#if defined(OP_ENABLE_HTTP)
+ /*If not, try http/https.*/
+ else{
+ OpusHTTPStream *stream;
+ int ret;
+ stream=(OpusHTTPStream *)_ogg_malloc(sizeof(*stream));
+ if(OP_UNLIKELY(stream==NULL))return NULL;
+ op_http_stream_init(stream);
+ ret=op_http_stream_open(stream,_url,_skip_certificate_check,
+ _proxy_host,_proxy_port,_proxy_user,_proxy_pass,_info);
+ if(OP_UNLIKELY(ret<0)){
+ op_http_stream_clear(stream);
+ _ogg_free(stream);
+ return NULL;
+ }
+ *_cb=*&OP_HTTP_CALLBACKS;
+ return stream;
+ }
+#else
+ (void)_skip_certificate_check;
+ (void)_proxy_host;
+ (void)_proxy_port;
+ (void)_proxy_user;
+ (void)_proxy_pass;
+ (void)_info;
+ return NULL;
+#endif
+}
+
+void *op_url_stream_vcreate(OpusFileCallbacks *_cb,
+ const char *_url,va_list _ap){
+ int skip_certificate_check;
+ const char *proxy_host;
+ opus_int32 proxy_port;
+ const char *proxy_user;
+ const char *proxy_pass;
+ OpusServerInfo *pinfo;
+ skip_certificate_check=0;
+ proxy_host=NULL;
+ proxy_port=8080;
+ proxy_user=NULL;
+ proxy_pass=NULL;
+ pinfo=NULL;
+ for(;;){
+ ptrdiff_t request;
+ request=va_arg(_ap,char *)-(char *)NULL;
+ /*If we hit NULL, we're done processing options.*/
+ if(!request)break;
+ switch(request){
+ case OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST:{
+ skip_certificate_check=!!va_arg(_ap,opus_int32);
+ }break;
+ case OP_HTTP_PROXY_HOST_REQUEST:{
+ proxy_host=va_arg(_ap,const char *);
+ }break;
+ case OP_HTTP_PROXY_PORT_REQUEST:{
+ proxy_port=va_arg(_ap,opus_int32);
+ if(proxy_port<0||proxy_port>(opus_int32)65535)return NULL;
+ }break;
+ case OP_HTTP_PROXY_USER_REQUEST:{
+ proxy_user=va_arg(_ap,const char *);
+ }break;
+ case OP_HTTP_PROXY_PASS_REQUEST:{
+ proxy_pass=va_arg(_ap,const char *);
+ }break;
+ case OP_GET_SERVER_INFO_REQUEST:{
+ pinfo=va_arg(_ap,OpusServerInfo *);
+ }break;
+ /*Some unknown option.*/
+ default:return NULL;
+ }
+ }
+ /*If the caller has requested server information, proxy it to a local copy to
+ simplify error handling.*/
+ if(pinfo!=NULL){
+ OpusServerInfo info;
+ void *ret;
+ opus_server_info_init(&info);
+ ret=op_url_stream_create_impl(_cb,_url,skip_certificate_check,
+ proxy_host,proxy_port,proxy_user,proxy_pass,&info);
+ if(ret!=NULL)*pinfo=*&info;
+ else opus_server_info_clear(&info);
+ return ret;
+ }
+ return op_url_stream_create_impl(_cb,_url,skip_certificate_check,
+ proxy_host,proxy_port,proxy_user,proxy_pass,NULL);
+}
+
+void *op_url_stream_create(OpusFileCallbacks *_cb,
+ const char *_url,...){
+ va_list ap;
+ void *ret;
+ va_start(ap,_url);
+ ret=op_url_stream_vcreate(_cb,_url,ap);
+ va_end(ap);
+ return ret;
+}
+
+/*Convenience routines to open/test URLs in a single step.*/
+
+OggOpusFile *op_vopen_url(const char *_url,int *_error,va_list _ap){
+ OpusFileCallbacks cb;
+ OggOpusFile *of;
+ void *source;
+ source=op_url_stream_vcreate(&cb,_url,_ap);
+ if(OP_UNLIKELY(source==NULL)){
+ if(_error!=NULL)*_error=OP_EFAULT;
+ return NULL;
+ }
+ of=op_open_callbacks(source,&cb,NULL,0,_error);
+ if(OP_UNLIKELY(of==NULL))(*cb.close)(source);
+ return of;
+}
+
+OggOpusFile *op_open_url(const char *_url,int *_error,...){
+ OggOpusFile *ret;
+ va_list ap;
+ va_start(ap,_error);
+ ret=op_vopen_url(_url,_error,ap);
+ va_end(ap);
+ return ret;
+}
+
+OggOpusFile *op_vtest_url(const char *_url,int *_error,va_list _ap){
+ OpusFileCallbacks cb;
+ OggOpusFile *of;
+ void *source;
+ source=op_url_stream_vcreate(&cb,_url,_ap);
+ if(OP_UNLIKELY(source==NULL)){
+ if(_error!=NULL)*_error=OP_EFAULT;
+ return NULL;
+ }
+ of=op_test_callbacks(source,&cb,NULL,0,_error);
+ if(OP_UNLIKELY(of==NULL))(*cb.close)(source);
+ return of;
+}
+
+OggOpusFile *op_test_url(const char *_url,int *_error,...){
+ OggOpusFile *ret;
+ va_list ap;
+ va_start(ap,_error);
+ ret=op_vtest_url(_url,_error,ap);
+ va_end(ap);
+ return ret;
+}
diff --git a/drivers/opus/info.c b/drivers/opus/info.c
new file mode 100644
index 0000000000..f5ad2110be
--- /dev/null
+++ b/drivers/opus/info.c
@@ -0,0 +1,687 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <limits.h>
+#include <string.h>
+
+static unsigned op_parse_uint16le(const unsigned char *_data){
+ return _data[0]|_data[1]<<8;
+}
+
+static int op_parse_int16le(const unsigned char *_data){
+ int ret;
+ ret=_data[0]|_data[1]<<8;
+ return (ret^0x8000)-0x8000;
+}
+
+static opus_uint32 op_parse_uint32le(const unsigned char *_data){
+ return _data[0]|(opus_uint32)_data[1]<<8|
+ (opus_uint32)_data[2]<<16|(opus_uint32)_data[3]<<24;
+}
+
+static opus_uint32 op_parse_uint32be(const unsigned char *_data){
+ return _data[3]|(opus_uint32)_data[2]<<8|
+ (opus_uint32)_data[1]<<16|(opus_uint32)_data[0]<<24;
+}
+
+int opus_head_parse(OpusHead *_head,const unsigned char *_data,size_t _len){
+ OpusHead head;
+ if(_len<8)return OP_ENOTFORMAT;
+ if(memcmp(_data,"OpusHead",8)!=0)return OP_ENOTFORMAT;
+ if(_len<9)return OP_EBADHEADER;
+ head.version=_data[8];
+ if(head.version>15)return OP_EVERSION;
+ if(_len<19)return OP_EBADHEADER;
+ head.channel_count=_data[9];
+ head.pre_skip=op_parse_uint16le(_data+10);
+ head.input_sample_rate=op_parse_uint32le(_data+12);
+ head.output_gain=op_parse_int16le(_data+16);
+ head.mapping_family=_data[18];
+ if(head.mapping_family==0){
+ if(head.channel_count<1||head.channel_count>2)return OP_EBADHEADER;
+ if(head.version<=1&&_len>19)return OP_EBADHEADER;
+ head.stream_count=1;
+ head.coupled_count=head.channel_count-1;
+ if(_head!=NULL){
+ _head->mapping[0]=0;
+ _head->mapping[1]=1;
+ }
+ }
+ else if(head.mapping_family==1){
+ size_t size;
+ int ci;
+ if(head.channel_count<1||head.channel_count>8)return OP_EBADHEADER;
+ size=21+head.channel_count;
+ if(_len<size||head.version<=1&&_len>size)return OP_EBADHEADER;
+ head.stream_count=_data[19];
+ if(head.stream_count<1)return OP_EBADHEADER;
+ head.coupled_count=_data[20];
+ if(head.coupled_count>head.stream_count)return OP_EBADHEADER;
+ for(ci=0;ci<head.channel_count;ci++){
+ if(_data[21+ci]>=head.stream_count+head.coupled_count
+ &&_data[21+ci]!=255){
+ return OP_EBADHEADER;
+ }
+ }
+ if(_head!=NULL)memcpy(_head->mapping,_data+21,head.channel_count);
+ }
+ /*General purpose players should not attempt to play back content with
+ channel mapping family 255.*/
+ else if(head.mapping_family==255)return OP_EIMPL;
+ /*No other channel mapping families are currently defined.*/
+ else return OP_EBADHEADER;
+ if(_head!=NULL)memcpy(_head,&head,head.mapping-(unsigned char *)&head);
+ return 0;
+}
+
+void opus_tags_init(OpusTags *_tags){
+ memset(_tags,0,sizeof(*_tags));
+}
+
+void opus_tags_clear(OpusTags *_tags){
+ int ci;
+ for(ci=_tags->comments;ci-->0;)_ogg_free(_tags->user_comments[ci]);
+ _ogg_free(_tags->user_comments);
+ _ogg_free(_tags->comment_lengths);
+ _ogg_free(_tags->vendor);
+}
+
+/*Ensure there's room for up to _ncomments comments.*/
+static int op_tags_ensure_capacity(OpusTags *_tags,size_t _ncomments){
+ char **user_comments;
+ int *comment_lengths;
+ size_t size;
+ if(OP_UNLIKELY(_ncomments>=(size_t)INT_MAX))return OP_EFAULT;
+ size=sizeof(*_tags->comment_lengths)*(_ncomments+1);
+ if(size/sizeof(*_tags->comment_lengths)!=_ncomments+1)return OP_EFAULT;
+ comment_lengths=(int *)_ogg_realloc(_tags->comment_lengths,size);
+ if(OP_UNLIKELY(comment_lengths==NULL))return OP_EFAULT;
+ comment_lengths[_ncomments]=0;
+ _tags->comment_lengths=comment_lengths;
+ size=sizeof(*_tags->user_comments)*(_ncomments+1);
+ if(size/sizeof(*_tags->user_comments)!=_ncomments+1)return OP_EFAULT;
+ user_comments=(char **)_ogg_realloc(_tags->user_comments,size);
+ if(OP_UNLIKELY(user_comments==NULL))return OP_EFAULT;
+ user_comments[_ncomments]=NULL;
+ _tags->user_comments=user_comments;
+ return 0;
+}
+
+/*Duplicate a (possibly non-NUL terminated) string with a known length.*/
+static char *op_strdup_with_len(const char *_s,size_t _len){
+ size_t size;
+ char *ret;
+ size=sizeof(*ret)*(_len+1);
+ if(OP_UNLIKELY(size<_len))return NULL;
+ ret=(char *)_ogg_malloc(size);
+ if(OP_LIKELY(ret!=NULL)){
+ ret=(char *)memcpy(ret,_s,sizeof(*ret)*_len);
+ ret[_len]='\0';
+ }
+ return ret;
+}
+
+/*The actual implementation of opus_tags_parse().
+ Unlike the public API, this function requires _tags to already be
+ initialized, modifies its contents before success is guaranteed, and assumes
+ the caller will clear it on error.*/
+static int opus_tags_parse_impl(OpusTags *_tags,
+ const unsigned char *_data,size_t _len){
+ opus_uint32 count;
+ size_t len;
+ int ncomments;
+ int ci;
+ len=_len;
+ if(len<8)return OP_ENOTFORMAT;
+ if(memcmp(_data,"OpusTags",8)!=0)return OP_ENOTFORMAT;
+ if(len<16)return OP_EBADHEADER;
+ _data+=8;
+ len-=8;
+ count=op_parse_uint32le(_data);
+ _data+=4;
+ len-=4;
+ if(count>len)return OP_EBADHEADER;
+ if(_tags!=NULL){
+ _tags->vendor=op_strdup_with_len((char *)_data,count);
+ if(_tags->vendor==NULL)return OP_EFAULT;
+ }
+ _data+=count;
+ len-=count;
+ if(len<4)return OP_EBADHEADER;
+ count=op_parse_uint32le(_data);
+ _data+=4;
+ len-=4;
+ /*Check to make sure there's minimally sufficient data left in the packet.*/
+ if(count>len>>2)return OP_EBADHEADER;
+ /*Check for overflow (the API limits this to an int).*/
+ if(count>(opus_uint32)INT_MAX-1)return OP_EFAULT;
+ if(_tags!=NULL){
+ int ret;
+ ret=op_tags_ensure_capacity(_tags,count);
+ if(ret<0)return ret;
+ }
+ ncomments=(int)count;
+ for(ci=0;ci<ncomments;ci++){
+ /*Check to make sure there's minimally sufficient data left in the packet.*/
+ if((size_t)(ncomments-ci)>len>>2)return OP_EBADHEADER;
+ count=op_parse_uint32le(_data);
+ _data+=4;
+ len-=4;
+ if(count>len)return OP_EBADHEADER;
+ /*Check for overflow (the API limits this to an int).*/
+ if(count>(opus_uint32)INT_MAX)return OP_EFAULT;
+ if(_tags!=NULL){
+ _tags->user_comments[ci]=op_strdup_with_len((char *)_data,count);
+ if(_tags->user_comments[ci]==NULL)return OP_EFAULT;
+ _tags->comment_lengths[ci]=(int)count;
+ _tags->comments=ci+1;
+ }
+ _data+=count;
+ len-=count;
+ }
+ return 0;
+}
+
+int opus_tags_parse(OpusTags *_tags,const unsigned char *_data,size_t _len){
+ if(_tags!=NULL){
+ OpusTags tags;
+ int ret;
+ opus_tags_init(&tags);
+ ret=opus_tags_parse_impl(&tags,_data,_len);
+ if(ret<0)opus_tags_clear(&tags);
+ else *_tags=*&tags;
+ return ret;
+ }
+ else return opus_tags_parse_impl(NULL,_data,_len);
+}
+
+/*The actual implementation of opus_tags_copy().
+ Unlike the public API, this function requires _dst to already be
+ initialized, modifies its contents before success is guaranteed, and assumes
+ the caller will clear it on error.*/
+static int opus_tags_copy_impl(OpusTags *_dst,const OpusTags *_src){
+ char *vendor;
+ int ncomments;
+ int ret;
+ int ci;
+ vendor=_src->vendor;
+ _dst->vendor=op_strdup_with_len(vendor,strlen(vendor));
+ if(OP_UNLIKELY(_dst->vendor==NULL))return OP_EFAULT;
+ ncomments=_src->comments;
+ ret=op_tags_ensure_capacity(_dst,ncomments);
+ if(OP_UNLIKELY(ret<0))return ret;
+ for(ci=0;ci<ncomments;ci++){
+ int len;
+ len=_src->comment_lengths[ci];
+ OP_ASSERT(len>=0);
+ _dst->user_comments[ci]=op_strdup_with_len(_src->user_comments[ci],len);
+ if(OP_UNLIKELY(_dst->user_comments[ci]==NULL))return OP_EFAULT;
+ _dst->comment_lengths[ci]=len;
+ _dst->comments=ci+1;
+ }
+ return 0;
+}
+
+int opus_tags_copy(OpusTags *_dst,const OpusTags *_src){
+ OpusTags dst;
+ int ret;
+ opus_tags_init(&dst);
+ ret=opus_tags_copy_impl(&dst,_src);
+ if(OP_UNLIKELY(ret<0))opus_tags_clear(&dst);
+ else *_dst=*&dst;
+ return 0;
+}
+
+int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value){
+ char *comment;
+ int tag_len;
+ int value_len;
+ int ncomments;
+ int ret;
+ ncomments=_tags->comments;
+ ret=op_tags_ensure_capacity(_tags,ncomments+1);
+ if(OP_UNLIKELY(ret<0))return ret;
+ tag_len=strlen(_tag);
+ value_len=strlen(_value);
+ /*+2 for '=' and '\0'.*/
+ _tags->comment_lengths[ncomments]=0;
+ _tags->user_comments[ncomments]=comment=
+ (char *)_ogg_malloc(sizeof(*comment)*(tag_len+value_len+2));
+ if(OP_UNLIKELY(comment==NULL))return OP_EFAULT;
+ memcpy(comment,_tag,sizeof(*comment)*tag_len);
+ comment[tag_len]='=';
+ memcpy(comment+tag_len+1,_value,sizeof(*comment)*(value_len+1));
+ _tags->comment_lengths[ncomments]=tag_len+value_len+1;
+ _tags->comments=ncomments+1;
+ return 0;
+}
+
+int opus_tags_add_comment(OpusTags *_tags,const char *_comment){
+ int comment_len;
+ int ncomments;
+ int ret;
+ ncomments=_tags->comments;
+ ret=op_tags_ensure_capacity(_tags,ncomments+1);
+ if(OP_UNLIKELY(ret<0))return ret;
+ comment_len=(int)strlen(_comment);
+ _tags->comment_lengths[ncomments]=0;
+ _tags->user_comments[ncomments]=op_strdup_with_len(_comment,comment_len);
+ if(OP_UNLIKELY(_tags->user_comments[ncomments]==NULL))return OP_EFAULT;
+ _tags->comment_lengths[ncomments]=comment_len;
+ _tags->comments=ncomments+1;
+ return 0;
+}
+
+int opus_tagcompare(const char *_tag_name,const char *_comment){
+ return opus_tagncompare(_tag_name,strlen(_tag_name),_comment);
+}
+
+int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){
+ int ret;
+ OP_ASSERT(_tag_len>=0);
+ ret=op_strncasecmp(_tag_name,_comment,_tag_len);
+ return ret?ret:'='-_comment[_tag_len];
+}
+
+const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){
+ char **user_comments;
+ int tag_len;
+ int found;
+ int ncomments;
+ int ci;
+ tag_len=strlen(_tag);
+ ncomments=_tags->comments;
+ user_comments=_tags->user_comments;
+ found=0;
+ for(ci=0;ci<ncomments;ci++){
+ if(!opus_tagncompare(_tag,tag_len,user_comments[ci])){
+ /*We return a pointer to the data, not a copy.*/
+ if(_count==found++)return user_comments[ci]+tag_len+1;
+ }
+ }
+ /*Didn't find anything.*/
+ return NULL;
+}
+
+int opus_tags_query_count(const OpusTags *_tags,const char *_tag){
+ char **user_comments;
+ int tag_len;
+ int found;
+ int ncomments;
+ int ci;
+ tag_len=strlen(_tag);
+ ncomments=_tags->comments;
+ user_comments=_tags->user_comments;
+ found=0;
+ for(ci=0;ci<ncomments;ci++){
+ if(!opus_tagncompare(_tag,tag_len,user_comments[ci]))found++;
+ }
+ return found;
+}
+
+int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8){
+ char **comments;
+ int ncomments;
+ int ci;
+ comments=_tags->user_comments;
+ ncomments=_tags->comments;
+ /*Look for the first valid R128_TRACK_GAIN tag and use that.*/
+ for(ci=0;ci<ncomments;ci++){
+ if(opus_tagncompare("R128_TRACK_GAIN",15,comments[ci])==0){
+ char *p;
+ opus_int32 gain_q8;
+ int negative;
+ p=comments[ci]+16;
+ negative=0;
+ if(*p=='-'){
+ negative=-1;
+ p++;
+ }
+ else if(*p=='+')p++;
+ gain_q8=0;
+ while(*p>='0'&&*p<='9'){
+ gain_q8=10*gain_q8+*p-'0';
+ if(gain_q8>32767-negative)break;
+ p++;
+ }
+ /*This didn't look like a signed 16-bit decimal integer.
+ Not a valid R128_TRACK_GAIN tag.*/
+ if(*p!='\0')continue;
+ *_gain_q8=(int)(gain_q8+negative^negative);
+ return 0;
+ }
+ }
+ return OP_FALSE;
+}
+
+static int op_is_jpeg(const unsigned char *_buf,size_t _buf_sz){
+ return _buf_sz>=11&&memcmp(_buf,"\xFF\xD8\xFF\xE0",4)==0
+ &&(_buf[4]<<8|_buf[5])>=16&&memcmp(_buf+6,"JFIF",5)==0;
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+ JPEG.
+ On failure, simply leaves its outputs unmodified.*/
+static void op_extract_jpeg_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+ if(op_is_jpeg(_buf,_buf_sz)){
+ size_t offs;
+ offs=2;
+ for(;;){
+ size_t segment_len;
+ int marker;
+ while(offs<_buf_sz&&_buf[offs]!=0xFF)offs++;
+ while(offs<_buf_sz&&_buf[offs]==0xFF)offs++;
+ marker=_buf[offs];
+ offs++;
+ /*If we hit EOI* (end of image), or another SOI* (start of image),
+ or SOS (start of scan), then stop now.*/
+ if(offs>=_buf_sz||(marker>=0xD8&&marker<=0xDA))break;
+ /*RST* (restart markers): skip (no segment length).*/
+ else if(marker>=0xD0&&marker<=0xD7)continue;
+ /*Read the length of the marker segment.*/
+ if(_buf_sz-offs<2)break;
+ segment_len=_buf[offs]<<8|_buf[offs+1];
+ if(segment_len<2||_buf_sz-offs<segment_len)break;
+ if(marker==0xC0||(marker>0xC0&&marker<0xD0&&(marker&3)!=0)){
+ /*Found a SOFn (start of frame) marker segment:*/
+ if(segment_len>=8){
+ *_height=_buf[offs+3]<<8|_buf[offs+4];
+ *_width=_buf[offs+5]<<8|_buf[offs+6];
+ *_depth=_buf[offs+2]*_buf[offs+7];
+ *_colors=0;
+ *_has_palette=0;
+ }
+ break;
+ }
+ /*Other markers: skip the whole marker segment.*/
+ offs+=segment_len;
+ }
+ }
+}
+
+static int op_is_png(const unsigned char *_buf,size_t _buf_sz){
+ return _buf_sz>=8&&memcmp(_buf,"\x89PNG\x0D\x0A\x1A\x0A",8)==0;
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+ PNG.
+ On failure, simply leaves its outputs unmodified.*/
+static void op_extract_png_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+ if(op_is_png(_buf,_buf_sz)){
+ size_t offs;
+ offs=8;
+ while(_buf_sz-offs>=12){
+ ogg_uint32_t chunk_len;
+ chunk_len=op_parse_uint32be(_buf+offs);
+ if(chunk_len>_buf_sz-(offs+12))break;
+ else if(chunk_len==13&&memcmp(_buf+offs+4,"IHDR",4)==0){
+ int color_type;
+ *_width=op_parse_uint32be(_buf+offs+8);
+ *_height=op_parse_uint32be(_buf+offs+12);
+ color_type=_buf[offs+17];
+ if(color_type==3){
+ *_depth=24;
+ *_has_palette=1;
+ }
+ else{
+ int sample_depth;
+ sample_depth=_buf[offs+16];
+ if(color_type==0)*_depth=sample_depth;
+ else if(color_type==2)*_depth=sample_depth*3;
+ else if(color_type==4)*_depth=sample_depth*2;
+ else if(color_type==6)*_depth=sample_depth*4;
+ *_colors=0;
+ *_has_palette=0;
+ break;
+ }
+ }
+ else if(*_has_palette>0&&memcmp(_buf+offs+4,"PLTE",4)==0){
+ *_colors=chunk_len/3;
+ break;
+ }
+ offs+=12+chunk_len;
+ }
+ }
+}
+
+static int op_is_gif(const unsigned char *_buf,size_t _buf_sz){
+ return _buf_sz>=6&&(memcmp(_buf,"GIF87a",6)==0||memcmp(_buf,"GIF89a",6)==0);
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+ GIF.
+ On failure, simply leaves its outputs unmodified.*/
+static void op_extract_gif_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+ if(op_is_gif(_buf,_buf_sz)&&_buf_sz>=14){
+ *_width=_buf[6]|_buf[7]<<8;
+ *_height=_buf[8]|_buf[9]<<8;
+ /*libFLAC hard-codes the depth to 24.*/
+ *_depth=24;
+ *_colors=1<<((_buf[10]&7)+1);
+ *_has_palette=1;
+ }
+}
+
+/*The actual implementation of opus_picture_tag_parse().
+ Unlike the public API, this function requires _pic to already be
+ initialized, modifies its contents before success is guaranteed, and assumes
+ the caller will clear it on error.*/
+static int opus_picture_tag_parse_impl(OpusPictureTag *_pic,const char *_tag,
+ unsigned char *_buf,size_t _buf_sz,size_t _base64_sz){
+ opus_int32 picture_type;
+ opus_uint32 mime_type_length;
+ char *mime_type;
+ opus_uint32 description_length;
+ char *description;
+ opus_uint32 width;
+ opus_uint32 height;
+ opus_uint32 depth;
+ opus_uint32 colors;
+ opus_uint32 data_length;
+ opus_uint32 file_width;
+ opus_uint32 file_height;
+ opus_uint32 file_depth;
+ opus_uint32 file_colors;
+ int format;
+ int has_palette;
+ int colors_set;
+ size_t i;
+ /*Decode the BASE64 data.*/
+ for(i=0;i<_base64_sz;i++){
+ opus_uint32 value;
+ int j;
+ value=0;
+ for(j=0;j<4;j++){
+ unsigned c;
+ unsigned d;
+ c=(unsigned char)_tag[4*i+j];
+ if(c=='+')d=62;
+ else if(c=='/')d=63;
+ else if(c>='0'&&c<='9')d=52+c-'0';
+ else if(c>='a'&&c<='z')d=26+c-'a';
+ else if(c>='A'&&c<='Z')d=c-'A';
+ else if(c=='='&&3*i+j>_buf_sz)d=0;
+ else return OP_ENOTFORMAT;
+ value=value<<6|d;
+ }
+ _buf[3*i]=(unsigned char)(value>>16);
+ if(3*i+1<_buf_sz){
+ _buf[3*i+1]=(unsigned char)(value>>8);
+ if(3*i+2<_buf_sz)_buf[3*i+2]=(unsigned char)value;
+ }
+ }
+ i=0;
+ picture_type=op_parse_uint32be(_buf+i);
+ i+=4;
+ /*Extract the MIME type.*/
+ mime_type_length=op_parse_uint32be(_buf+i);
+ i+=4;
+ if(mime_type_length>_buf_sz-32)return OP_ENOTFORMAT;
+ mime_type=(char *)_ogg_malloc(sizeof(*_pic->mime_type)*(mime_type_length+1));
+ if(mime_type==NULL)return OP_EFAULT;
+ memcpy(mime_type,_buf+i,sizeof(*mime_type)*mime_type_length);
+ mime_type[mime_type_length]='\0';
+ _pic->mime_type=mime_type;
+ i+=mime_type_length;
+ /*Extract the description string.*/
+ description_length=op_parse_uint32be(_buf+i);
+ i+=4;
+ if(description_length>_buf_sz-mime_type_length-32)return OP_ENOTFORMAT;
+ description=
+ (char *)_ogg_malloc(sizeof(*_pic->mime_type)*(description_length+1));
+ if(description==NULL)return OP_EFAULT;
+ memcpy(description,_buf+i,sizeof(*description)*description_length);
+ description[description_length]='\0';
+ _pic->description=description;
+ i+=description_length;
+ /*Extract the remaining fields.*/
+ width=op_parse_uint32be(_buf+i);
+ i+=4;
+ height=op_parse_uint32be(_buf+i);
+ i+=4;
+ depth=op_parse_uint32be(_buf+i);
+ i+=4;
+ colors=op_parse_uint32be(_buf+i);
+ i+=4;
+ /*If one of these is set, they all must be, but colors==0 is a valid value.*/
+ colors_set=width!=0||height!=0||depth!=0||colors!=0;
+ if((width==0||height==0||depth==0)&&colors_set)return OP_ENOTFORMAT;
+ data_length=op_parse_uint32be(_buf+i);
+ i+=4;
+ if(data_length>_buf_sz-i)return OP_ENOTFORMAT;
+ /*Trim extraneous data so we don't copy it below.*/
+ _buf_sz=i+data_length;
+ /*Attempt to determine the image format.*/
+ format=OP_PIC_FORMAT_UNKNOWN;
+ if(mime_type_length==3&&strcmp(mime_type,"-->")==0){
+ format=OP_PIC_FORMAT_URL;
+ /*Picture type 1 must be a 32x32 PNG.*/
+ if(picture_type==1&&(width!=0||height!=0)&&(width!=32||height!=32)){
+ return OP_ENOTFORMAT;
+ }
+ /*Append a terminating NUL for the convenience of our callers.*/
+ _buf[_buf_sz++]='\0';
+ }
+ else{
+ if(mime_type_length==10
+ &&op_strncasecmp(mime_type,"image/jpeg",mime_type_length)==0){
+ if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG;
+ }
+ else if(mime_type_length==9
+ &&op_strncasecmp(mime_type,"image/png",mime_type_length)==0){
+ if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG;
+ }
+ else if(mime_type_length==9
+ &&op_strncasecmp(mime_type,"image/gif",mime_type_length)==0){
+ if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF;
+ }
+ else if(mime_type_length==0||(mime_type_length==6
+ &&op_strncasecmp(mime_type,"image/",mime_type_length)==0)){
+ if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG;
+ else if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG;
+ else if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF;
+ }
+ file_width=file_height=file_depth=file_colors=0;
+ has_palette=-1;
+ switch(format){
+ case OP_PIC_FORMAT_JPEG:{
+ op_extract_jpeg_params(_buf+i,data_length,
+ &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+ }break;
+ case OP_PIC_FORMAT_PNG:{
+ op_extract_png_params(_buf+i,data_length,
+ &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+ }break;
+ case OP_PIC_FORMAT_GIF:{
+ op_extract_gif_params(_buf+i,data_length,
+ &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+ }break;
+ }
+ if(has_palette>=0){
+ /*If we successfully extracted these parameters from the image, override
+ any declared values.*/
+ width=file_width;
+ height=file_height;
+ depth=file_depth;
+ colors=file_colors;
+ }
+ /*Picture type 1 must be a 32x32 PNG.*/
+ if(picture_type==1&&(format!=OP_PIC_FORMAT_PNG||width!=32||height!=32)){
+ return OP_ENOTFORMAT;
+ }
+ }
+ /*Adjust _buf_sz instead of using data_length to capture the terminating NUL
+ for URLs.*/
+ _buf_sz-=i;
+ memmove(_buf,_buf+i,sizeof(*_buf)*_buf_sz);
+ _buf=(unsigned char *)_ogg_realloc(_buf,_buf_sz);
+ if(_buf_sz>0&&_buf==NULL)return OP_EFAULT;
+ _pic->type=picture_type;
+ _pic->width=width;
+ _pic->height=height;
+ _pic->depth=depth;
+ _pic->colors=colors;
+ _pic->data_length=data_length;
+ _pic->data=_buf;
+ _pic->format=format;
+ return 0;
+}
+
+int opus_picture_tag_parse(OpusPictureTag *_pic,const char *_tag){
+ OpusPictureTag pic;
+ unsigned char *buf;
+ size_t base64_sz;
+ size_t buf_sz;
+ size_t tag_length;
+ int ret;
+ if(opus_tagncompare("METADATA_BLOCK_PICTURE",22,_tag)==0)_tag+=23;
+ /*Figure out how much BASE64-encoded data we have.*/
+ tag_length=strlen(_tag);
+ if(tag_length&3)return OP_ENOTFORMAT;
+ base64_sz=tag_length>>2;
+ buf_sz=3*base64_sz;
+ if(buf_sz<32)return OP_ENOTFORMAT;
+ if(_tag[tag_length-1]=='=')buf_sz--;
+ if(_tag[tag_length-2]=='=')buf_sz--;
+ if(buf_sz<32)return OP_ENOTFORMAT;
+ /*Allocate an extra byte to allow appending a terminating NUL to URL data.*/
+ buf=(unsigned char *)_ogg_malloc(sizeof(*buf)*(buf_sz+1));
+ if(buf==NULL)return OP_EFAULT;
+ opus_picture_tag_init(&pic);
+ ret=opus_picture_tag_parse_impl(&pic,_tag,buf,buf_sz,base64_sz);
+ if(ret<0){
+ opus_picture_tag_clear(&pic);
+ _ogg_free(buf);
+ }
+ else *_pic=*&pic;
+ return ret;
+}
+
+void opus_picture_tag_init(OpusPictureTag *_pic){
+ memset(_pic,0,sizeof(*_pic));
+}
+
+void opus_picture_tag_clear(OpusPictureTag *_pic){
+ _ogg_free(_pic->description);
+ _ogg_free(_pic->mime_type);
+ _ogg_free(_pic->data);
+}
diff --git a/drivers/opus/internal.c b/drivers/opus/internal.c
new file mode 100644
index 0000000000..a9c3671179
--- /dev/null
+++ b/drivers/opus/internal.c
@@ -0,0 +1,42 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+
+#if defined(OP_ENABLE_ASSERTIONS)
+void op_fatal_impl(const char *_str,const char *_file,int _line){
+ fprintf(stderr,"Fatal (internal) error in %s, line %i: %s\n",
+ _file,_line,_str);
+ abort();
+}
+#endif
+
+/*A version of strncasecmp() that is guaranteed to only ignore the case of
+ ASCII characters.*/
+int op_strncasecmp(const char *_a,const char *_b,int _n){
+ int i;
+ for(i=0;i<_n;i++){
+ int a;
+ int b;
+ int d;
+ a=_a[i];
+ b=_b[i];
+ if(a>='a'&&a<='z')a-='a'-'A';
+ if(b>='a'&&b<='z')b-='a'-'A';
+ d=a-b;
+ if(d)return d;
+ }
+ return 0;
+}
diff --git a/drivers/opus/internal.h b/drivers/opus/internal.h
new file mode 100644
index 0000000000..cb4089fd4d
--- /dev/null
+++ b/drivers/opus/internal.h
@@ -0,0 +1,249 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+#if !defined(_opusfile_internal_h)
+# define _opusfile_internal_h (1)
+
+# if !defined(_REENTRANT)
+# define _REENTRANT
+# endif
+# if !defined(_GNU_SOURCE)
+# define _GNU_SOURCE
+# endif
+# if !defined(_LARGEFILE_SOURCE)
+# define _LARGEFILE_SOURCE
+# endif
+# if !defined(_LARGEFILE64_SOURCE)
+# define _LARGEFILE64_SOURCE
+# endif
+# if !defined(_FILE_OFFSET_BITS)
+# define _FILE_OFFSET_BITS 64
+# endif
+
+# include <stdlib.h>
+# include <opus/opusfile.h>
+
+typedef struct OggOpusLink OggOpusLink;
+
+# if defined(OPUS_FIXED_POINT)
+
+typedef opus_int16 op_sample;
+
+# else
+
+typedef float op_sample;
+
+/*We're using this define to test for libopus 1.1 or later until libopus
+ provides a better mechanism.*/
+# if defined(OPUS_GET_EXPERT_FRAME_DURATION_REQUEST)
+/*Enable soft clipping prevention in 16-bit decodes.*/
+# define OP_SOFT_CLIP (1)
+# endif
+
+# endif
+
+# if OP_GNUC_PREREQ(4,2)
+/*Disable excessive warnings about the order of operations.*/
+# pragma GCC diagnostic ignored "-Wparentheses"
+# elif defined(_MSC_VER)
+/*Disable excessive warnings about the order of operations.*/
+# pragma warning(disable:4554)
+/*Disable warnings about "deprecated" POSIX functions.*/
+# pragma warning(disable:4996)
+# endif
+
+# if OP_GNUC_PREREQ(3,0)
+/*Another alternative is
+ (__builtin_constant_p(_x)?!!(_x):__builtin_expect(!!(_x),1))
+ but that evaluates _x multiple times, which may be bad.*/
+# define OP_LIKELY(_x) (__builtin_expect(!!(_x),1))
+# define OP_UNLIKELY(_x) (__builtin_expect(!!(_x),0))
+# else
+# define OP_LIKELY(_x) (!!(_x))
+# define OP_UNLIKELY(_x) (!!(_x))
+# endif
+
+# if defined(OP_ENABLE_ASSERTIONS)
+# if OP_GNUC_PREREQ(2,5)||__SUNPRO_C>=0x590
+__attribute__((noreturn))
+# endif
+void op_fatal_impl(const char *_str,const char *_file,int _line);
+
+# define OP_FATAL(_str) (op_fatal_impl(_str,__FILE__,__LINE__))
+
+# define OP_ASSERT(_cond) \
+ do{ \
+ if(OP_UNLIKELY(!(_cond)))OP_FATAL("assertion failed: " #_cond); \
+ } \
+ while(0)
+# define OP_ALWAYS_TRUE(_cond) OP_ASSERT(_cond)
+
+# else
+# define OP_FATAL(_str) abort()
+# define OP_ASSERT(_cond)
+# define OP_ALWAYS_TRUE(_cond) ((void)(_cond))
+# endif
+
+# define OP_INT64_MAX (2*(((ogg_int64_t)1<<62)-1)|1)
+# define OP_INT64_MIN (-OP_INT64_MAX-1)
+# define OP_INT32_MAX (2*(((ogg_int32_t)1<<30)-1)|1)
+# define OP_INT32_MIN (-OP_INT32_MAX-1)
+
+# define OP_MIN(_a,_b) ((_a)<(_b)?(_a):(_b))
+# define OP_MAX(_a,_b) ((_a)>(_b)?(_a):(_b))
+# define OP_CLAMP(_lo,_x,_hi) (OP_MAX(_lo,OP_MIN(_x,_hi)))
+
+/*Advance a file offset by the given amount, clamping against OP_INT64_MAX.
+ This is used to advance a known offset by things like OP_CHUNK_SIZE or
+ OP_PAGE_SIZE_MAX, while making sure to avoid signed overflow.
+ It assumes that both _offset and _amount are non-negative.*/
+#define OP_ADV_OFFSET(_offset,_amount) \
+ (OP_MIN(_offset,OP_INT64_MAX-(_amount))+(_amount))
+
+/*The maximum channel count for any mapping we'll actually decode.*/
+# define OP_NCHANNELS_MAX (8)
+
+/*Initial state.*/
+# define OP_NOTOPEN (0)
+/*We've found the first Opus stream in the first link.*/
+# define OP_PARTOPEN (1)
+# define OP_OPENED (2)
+/*We've found the first Opus stream in the current link.*/
+# define OP_STREAMSET (3)
+/*We've initialized the decoder for the chosen Opus stream in the current
+ link.*/
+# define OP_INITSET (4)
+
+/*Information cached for a single link in a chained Ogg Opus file.
+ We choose the first Opus stream encountered in each link to play back (and
+ require at least one).*/
+struct OggOpusLink{
+ /*The byte offset of the first header page in this link.*/
+ opus_int64 offset;
+ /*The byte offset of the first data page from the chosen Opus stream in this
+ link (after the headers).*/
+ opus_int64 data_offset;
+ /*The byte offset of the last page from the chosen Opus stream in this link.
+ This is used when seeking to ensure we find a page before the last one, so
+ that end-trimming calculations work properly.
+ This is only valid for seekable sources.*/
+ opus_int64 end_offset;
+ /*The granule position of the last sample.
+ This is only valid for seekable sources.*/
+ ogg_int64_t pcm_end;
+ /*The granule position before the first sample.*/
+ ogg_int64_t pcm_start;
+ /*The serial number.*/
+ ogg_uint32_t serialno;
+ /*The contents of the info header.*/
+ OpusHead head;
+ /*The contents of the comment header.*/
+ OpusTags tags;
+};
+
+struct OggOpusFile{
+ /*The callbacks used to access the data source.*/
+ OpusFileCallbacks callbacks;
+ /*A FILE *, memory bufer, etc.*/
+ void *source;
+ /*Whether or not we can seek with this data source.*/
+ int seekable;
+ /*The number of links in this chained Ogg Opus file.*/
+ int nlinks;
+ /*The cached information from each link in a chained Ogg Opus file.
+ If source isn't seekable (e.g., it's a pipe), only the current link
+ appears.*/
+ OggOpusLink *links;
+ /*The number of serial numbers from a single link.*/
+ int nserialnos;
+ /*The capacity of the list of serial numbers from a single link.*/
+ int cserialnos;
+ /*Storage for the list of serial numbers from a single link.*/
+ ogg_uint32_t *serialnos;
+ /*This is the current offset of the data processed by the ogg_sync_state.
+ After a seek, this should be set to the target offset so that we can track
+ the byte offsets of subsequent pages.
+ After a call to op_get_next_page(), this will point to the first byte after
+ that page.*/
+ opus_int64 offset;
+ /*The total size of this data source, or -1 if it's unseekable.*/
+ opus_int64 end;
+ /*Used to locate pages in the data source.*/
+ ogg_sync_state oy;
+ /*One of OP_NOTOPEN, OP_PARTOPEN, OP_OPENED, OP_STREAMSET, OP_INITSET.*/
+ int ready_state;
+ /*The current link being played back.*/
+ int cur_link;
+ /*The number of decoded samples to discard from the start of decoding.*/
+ opus_int32 cur_discard_count;
+ /*The granule position of the previous packet (current packet start time).*/
+ ogg_int64_t prev_packet_gp;
+ /*The number of bytes read since the last bitrate query, including framing.*/
+ opus_int64 bytes_tracked;
+ /*The number of samples decoded since the last bitrate query.*/
+ ogg_int64_t samples_tracked;
+ /*Takes physical pages and welds them into a logical stream of packets.*/
+ ogg_stream_state os;
+ /*Re-timestamped packets from a single page.
+ Buffering these relies on the undocumented libogg behavior that ogg_packet
+ pointers remain valid until the next page is submitted to the
+ ogg_stream_state they came from.*/
+ ogg_packet op[255];
+ /*The index of the next packet to return.*/
+ int op_pos;
+ /*The total number of packets available.*/
+ int op_count;
+ /*Central working state for the packet-to-PCM decoder.*/
+ OpusMSDecoder *od;
+ /*The application-provided packet decode callback.*/
+ op_decode_cb_func decode_cb;
+ /*The application-provided packet decode callback context.*/
+ void *decode_cb_ctx;
+ /*The stream count used to initialize the decoder.*/
+ int od_stream_count;
+ /*The coupled stream count used to initialize the decoder.*/
+ int od_coupled_count;
+ /*The channel count used to initialize the decoder.*/
+ int od_channel_count;
+ /*The channel mapping used to initialize the decoder.*/
+ unsigned char od_mapping[OP_NCHANNELS_MAX];
+ /*The buffered data for one decoded packet.*/
+ op_sample *od_buffer;
+ /*The current position in the decoded buffer.*/
+ int od_buffer_pos;
+ /*The number of valid samples in the decoded buffer.*/
+ int od_buffer_size;
+ /*The type of gain offset to apply.
+ One of OP_HEADER_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/
+ int gain_type;
+ /*The offset to apply to the gain.*/
+ opus_int32 gain_offset_q8;
+ /*Internal state for soft clipping and dithering float->short output.*/
+#if !defined(OPUS_FIXED_POINT)
+# if defined(OP_SOFT_CLIP)
+ float clip_state[OP_NCHANNELS_MAX];
+# endif
+ float dither_a[OP_NCHANNELS_MAX*4];
+ float dither_b[OP_NCHANNELS_MAX*4];
+ opus_uint32 dither_seed;
+ int dither_mute;
+ int dither_disabled;
+ /*The number of channels represented by the internal state.
+ This gets set to 0 whenever anything that would prevent state propagation
+ occurs (switching between the float/short APIs, or between the
+ stereo/multistream APIs).*/
+ int state_channel_count;
+#endif
+};
+
+int op_strncasecmp(const char *_a,const char *_b,int _n);
+
+#endif
diff --git a/drivers/opus/mlp.c b/drivers/opus/mlp.c
new file mode 100644
index 0000000000..7220a23d42
--- /dev/null
+++ b/drivers/opus/mlp.c
@@ -0,0 +1,140 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#include <math.h>
+#include "mlp.h"
+#include "arch.h"
+#include "tansig_table.h"
+#define MAX_NEURONS 100
+
+#if 0
+static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
+{
+ int i;
+ opus_val16 xx; /* Q11 */
+ /*double x, y;*/
+ opus_val16 dy, yy; /* Q14 */
+ /*x = 1.9073e-06*_x;*/
+ if (_x>=QCONST32(8,19))
+ return QCONST32(1.,14);
+ if (_x<=-QCONST32(8,19))
+ return -QCONST32(1.,14);
+ xx = EXTRACT16(SHR32(_x, 8));
+ /*i = lrint(25*x);*/
+ i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
+ /*x -= .04*i;*/
+ xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
+ /*x = xx*(1./2048);*/
+ /*y = tansig_table[250+i];*/
+ yy = tansig_table[250+i];
+ /*y = yy*(1./16384);*/
+ dy = 16384-MULT16_16_Q14(yy,yy);
+ yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
+ return yy;
+}
+#else
+/*extern const float tansig_table[501];*/
+static OPUS_INLINE float tansig_approx(float x)
+{
+ int i;
+ float y, dy;
+ float sign=1;
+ /* Tests are reversed to catch NaNs */
+ if (!(x<8))
+ return 1;
+ if (!(x>-8))
+ return -1;
+ if (x<0)
+ {
+ x=-x;
+ sign=-1;
+ }
+ i = (int)floor(.5f+25*x);
+ x -= .04f*i;
+ y = tansig_table[i];
+ dy = 1-y*y;
+ y = y + x*dy*(1 - y*x);
+ return sign*y;
+}
+#endif
+
+#if 0
+void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
+{
+ int j;
+ opus_val16 hidden[MAX_NEURONS];
+ const opus_val16 *W = m->weights;
+ /* Copy to tmp_in */
+ for (j=0;j<m->topo[1];j++)
+ {
+ int k;
+ opus_val32 sum = SHL32(EXTEND32(*W++),8);
+ for (k=0;k<m->topo[0];k++)
+ sum = MAC16_16(sum, in[k],*W++);
+ hidden[j] = tansig_approx(sum);
+ }
+ for (j=0;j<m->topo[2];j++)
+ {
+ int k;
+ opus_val32 sum = SHL32(EXTEND32(*W++),14);
+ for (k=0;k<m->topo[1];k++)
+ sum = MAC16_16(sum, hidden[k], *W++);
+ out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
+ }
+}
+#else
+void mlp_process(const MLP *m, const float *in, float *out)
+{
+ int j;
+ float hidden[MAX_NEURONS];
+ const float *W = m->weights;
+ /* Copy to tmp_in */
+ for (j=0;j<m->topo[1];j++)
+ {
+ int k;
+ float sum = *W++;
+ for (k=0;k<m->topo[0];k++)
+ sum = sum + in[k]**W++;
+ hidden[j] = tansig_approx(sum);
+ }
+ for (j=0;j<m->topo[2];j++)
+ {
+ int k;
+ float sum = *W++;
+ for (k=0;k<m->topo[1];k++)
+ sum = sum + hidden[k]**W++;
+ out[j] = tansig_approx(sum);
+ }
+}
+#endif
diff --git a/drivers/opus/mlp.h b/drivers/opus/mlp.h
new file mode 100644
index 0000000000..86c8e0617d
--- /dev/null
+++ b/drivers/opus/mlp.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _MLP_H_
+#define _MLP_H_
+
+#include "arch.h"
+
+typedef struct {
+ int layers;
+ const int *topo;
+ const float *weights;
+} MLP;
+
+void mlp_process(const MLP *m, const float *in, float *out);
+
+#endif /* _MLP_H_ */
diff --git a/drivers/opus/mlp_data.c b/drivers/opus/mlp_data.c
new file mode 100644
index 0000000000..401c4c0250
--- /dev/null
+++ b/drivers/opus/mlp_data.c
@@ -0,0 +1,105 @@
+/* The contents of this file was automatically generated by mlp_train.c
+ It contains multi-layer perceptron (MLP) weights. */
+
+#include "mlp.h"
+
+/* RMS error was 0.138320, seed was 1361535663 */
+
+static const float weights[422] = {
+
+/* hidden layer */
+-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,
+-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,
+-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,
+0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,
+0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,
+24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,
+-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,
+-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,
+-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,
+1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,
+15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,
+0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,
+-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,
+0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,
+0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,
+-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,
+-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,
+-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,
+0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,
+-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,
+2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,
+0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,
+-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,
+0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,
+0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,
+-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,
+5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,
+-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,
+-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,
+-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,
+1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,
+-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,
+-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,
+0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,
+0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,
+-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,
+10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,
+-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,
+-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,
+-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,
+0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,
+-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,
+0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,
+0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,
+-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,
+0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,
+-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,
+-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,
+-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,
+-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,
+-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,
+5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,
+1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,
+0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,
+-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,
+0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,
+-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,
+-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,
+0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,
+-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,
+-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,
+0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,
+-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,
+0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,
+-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,
+-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,
+0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,
+-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,
+0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,
+-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,
+0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,
+-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,
+4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,
+0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,
+-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,
+0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,
+0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,
+3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,
+
+/* output layer */
+-0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f,
+0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f,
+0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f,
+0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f,
+4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f,
+-1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f,
+3.87308f, 3.52558f};
+
+static const int topo[3] = {25, 15, 2};
+
+const MLP net = {
+ 3,
+ topo,
+ weights
+};
diff --git a/drivers/opus/opus.c b/drivers/opus/opus.c
new file mode 100644
index 0000000000..8978e3b06b
--- /dev/null
+++ b/drivers/opus/opus.c
@@ -0,0 +1,329 @@
+/* Copyright (c) 2011 Xiph.Org Foundation, Skype Limited
+ Written by Jean-Marc Valin and Koen Vos */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus.h"
+#include "opus_private.h"
+
+#ifndef DISABLE_FLOAT_API
+OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem)
+{
+ int c;
+ int i;
+ float *x;
+
+ if (C<1 || N<1 || !_x || !declip_mem) return;
+
+ /* First thing: saturate everything to +/- 2 which is the highest level our
+ non-linearity can handle. At the point where the signal reaches +/-2,
+ the derivative will be zero anyway, so this doesn't introduce any
+ discontinuity in the derivative. */
+ for (i=0;i<N*C;i++)
+ _x[i] = MAX16(-2.f, MIN16(2.f, _x[i]));
+ for (c=0;c<C;c++)
+ {
+ float a;
+ float x0;
+ int curr;
+
+ x = _x+c;
+ a = declip_mem[c];
+ /* Continue applying the non-linearity from the previous frame to avoid
+ any discontinuity. */
+ for (i=0;i<N;i++)
+ {
+ if (x[i*C]*a>=0)
+ break;
+ x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
+ }
+
+ curr=0;
+ x0 = x[0];
+ while(1)
+ {
+ int start, end;
+ float maxval;
+ int special=0;
+ int peak_pos;
+ for (i=curr;i<N;i++)
+ {
+ if (x[i*C]>1 || x[i*C]<-1)
+ break;
+ }
+ if (i==N)
+ {
+ a=0;
+ break;
+ }
+ peak_pos = i;
+ start=end=i;
+ maxval=ABS16(x[i*C]);
+ /* Look for first zero crossing before clipping */
+ while (start>0 && x[i*C]*x[(start-1)*C]>=0)
+ start--;
+ /* Look for first zero crossing after clipping */
+ while (end<N && x[i*C]*x[end*C]>=0)
+ {
+ /* Look for other peaks until the next zero-crossing. */
+ if (ABS16(x[end*C])>maxval)
+ {
+ maxval = ABS16(x[end*C]);
+ peak_pos = end;
+ }
+ end++;
+ }
+ /* Detect the special case where we clip before the first zero crossing */
+ special = (start==0 && x[i*C]*x[0]>=0);
+
+ /* Compute a such that maxval + a*maxval^2 = 1 */
+ a=(maxval-1)/(maxval*maxval);
+ if (x[i*C]>0)
+ a = -a;
+ /* Apply soft clipping */
+ for (i=start;i<end;i++)
+ x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
+
+ if (special && peak_pos>=2)
+ {
+ /* Add a linear ramp from the first sample to the signal peak.
+ This avoids a discontinuity at the beginning of the frame. */
+ float delta;
+ float offset = x0-x[0];
+ delta = offset / peak_pos;
+ for (i=curr;i<peak_pos;i++)
+ {
+ offset -= delta;
+ x[i*C] += offset;
+ x[i*C] = MAX16(-1.f, MIN16(1.f, x[i*C]));
+ }
+ }
+ curr = end;
+ if (curr==N)
+ break;
+ }
+ declip_mem[c] = a;
+ }
+}
+#endif
+
+int encode_size(int size, unsigned char *data)
+{
+ if (size < 252)
+ {
+ data[0] = size;
+ return 1;
+ } else {
+ data[0] = 252+(size&0x3);
+ data[1] = (size-(int)data[0])>>2;
+ return 2;
+ }
+}
+
+static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size)
+{
+ if (len<1)
+ {
+ *size = -1;
+ return -1;
+ } else if (data[0]<252)
+ {
+ *size = data[0];
+ return 1;
+ } else if (len<2)
+ {
+ *size = -1;
+ return -1;
+ } else {
+ *size = 4*data[1] + data[0];
+ return 2;
+ }
+}
+
+int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+ int self_delimited, unsigned char *out_toc,
+ const unsigned char *frames[48], opus_int16 size[48],
+ int *payload_offset, opus_int32 *packet_offset)
+{
+ int i, bytes;
+ int count;
+ int cbr;
+ unsigned char ch, toc;
+ int framesize;
+ opus_int32 last_size;
+ opus_int32 pad = 0;
+ const unsigned char *data0 = data;
+
+ if (size==NULL)
+ return OPUS_BAD_ARG;
+
+ framesize = opus_packet_get_samples_per_frame(data, 48000);
+
+ cbr = 0;
+ toc = *data++;
+ len--;
+ last_size = len;
+ switch (toc&0x3)
+ {
+ /* One frame */
+ case 0:
+ count=1;
+ break;
+ /* Two CBR frames */
+ case 1:
+ count=2;
+ cbr = 1;
+ if (!self_delimited)
+ {
+ if (len&0x1)
+ return OPUS_INVALID_PACKET;
+ last_size = len/2;
+ /* If last_size doesn't fit in size[0], we'll catch it later */
+ size[0] = (opus_int16)last_size;
+ }
+ break;
+ /* Two VBR frames */
+ case 2:
+ count = 2;
+ bytes = parse_size(data, len, size);
+ len -= bytes;
+ if (size[0]<0 || size[0] > len)
+ return OPUS_INVALID_PACKET;
+ data += bytes;
+ last_size = len-size[0];
+ break;
+ /* Multiple CBR/VBR frames (from 0 to 120 ms) */
+ default: /*case 3:*/
+ if (len<1)
+ return OPUS_INVALID_PACKET;
+ /* Number of frames encoded in bits 0 to 5 */
+ ch = *data++;
+ count = ch&0x3F;
+ if (count <= 0 || framesize*count > 5760)
+ return OPUS_INVALID_PACKET;
+ len--;
+ /* Padding flag is bit 6 */
+ if (ch&0x40)
+ {
+ int p;
+ do {
+ int tmp;
+ if (len<=0)
+ return OPUS_INVALID_PACKET;
+ p = *data++;
+ len--;
+ tmp = p==255 ? 254: p;
+ len -= tmp;
+ pad += tmp;
+ } while (p==255);
+ }
+ if (len<0)
+ return OPUS_INVALID_PACKET;
+ /* VBR flag is bit 7 */
+ cbr = !(ch&0x80);
+ if (!cbr)
+ {
+ /* VBR case */
+ last_size = len;
+ for (i=0;i<count-1;i++)
+ {
+ bytes = parse_size(data, len, size+i);
+ len -= bytes;
+ if (size[i]<0 || size[i] > len)
+ return OPUS_INVALID_PACKET;
+ data += bytes;
+ last_size -= bytes+size[i];
+ }
+ if (last_size<0)
+ return OPUS_INVALID_PACKET;
+ } else if (!self_delimited)
+ {
+ /* CBR case */
+ last_size = len/count;
+ if (last_size*count!=len)
+ return OPUS_INVALID_PACKET;
+ for (i=0;i<count-1;i++)
+ size[i] = (opus_int16)last_size;
+ }
+ break;
+ }
+ /* Self-delimited framing has an extra size for the last frame. */
+ if (self_delimited)
+ {
+ bytes = parse_size(data, len, size+count-1);
+ len -= bytes;
+ if (size[count-1]<0 || size[count-1] > len)
+ return OPUS_INVALID_PACKET;
+ data += bytes;
+ /* For CBR packets, apply the size to all the frames. */
+ if (cbr)
+ {
+ if (size[count-1]*count > len)
+ return OPUS_INVALID_PACKET;
+ for (i=0;i<count-1;i++)
+ size[i] = size[count-1];
+ } else if (bytes+size[count-1] > last_size)
+ return OPUS_INVALID_PACKET;
+ } else
+ {
+ /* Because it's not encoded explicitly, it's possible the size of the
+ last packet (or all the packets, for the CBR case) is larger than
+ 1275. Reject them here.*/
+ if (last_size > 1275)
+ return OPUS_INVALID_PACKET;
+ size[count-1] = (opus_int16)last_size;
+ }
+
+ if (payload_offset)
+ *payload_offset = (int)(data-data0);
+
+ for (i=0;i<count;i++)
+ {
+ if (frames)
+ frames[i] = data;
+ data += size[i];
+ }
+
+ if (packet_offset)
+ *packet_offset = pad+(opus_int32)(data-data0);
+
+ if (out_toc)
+ *out_toc = toc;
+
+ return count;
+}
+
+int opus_packet_parse(const unsigned char *data, opus_int32 len,
+ unsigned char *out_toc, const unsigned char *frames[48],
+ opus_int16 size[48], int *payload_offset)
+{
+ return opus_packet_parse_impl(data, len, 0, out_toc,
+ frames, size, payload_offset, NULL);
+}
+
diff --git a/drivers/opus/opus.h b/drivers/opus/opus.h
new file mode 100644
index 0000000000..93a53a2ffc
--- /dev/null
+++ b/drivers/opus/opus.h
@@ -0,0 +1,978 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+ Written by Jean-Marc Valin and Koen Vos */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus.h
+ * @brief Opus reference implementation API
+ */
+
+#ifndef OPUS_H
+#define OPUS_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @mainpage Opus
+ *
+ * The Opus codec is designed for interactive speech and audio transmission over the Internet.
+ * It is designed by the IETF Codec Working Group and incorporates technology from
+ * Skype's SILK codec and Xiph.Org's CELT codec.
+ *
+ * The Opus codec is designed to handle a wide range of interactive audio applications,
+ * including Voice over IP, videoconferencing, in-game chat, and even remote live music
+ * performances. It can scale from low bit-rate narrowband speech to very high quality
+ * stereo music. Its main features are:
+
+ * @li Sampling rates from 8 to 48 kHz
+ * @li Bit-rates from 6 kb/s to 510 kb/s
+ * @li Support for both constant bit-rate (CBR) and variable bit-rate (VBR)
+ * @li Audio bandwidth from narrowband to full-band
+ * @li Support for speech and music
+ * @li Support for mono and stereo
+ * @li Support for multichannel (up to 255 channels)
+ * @li Frame sizes from 2.5 ms to 60 ms
+ * @li Good loss robustness and packet loss concealment (PLC)
+ * @li Floating point and fixed-point implementation
+ *
+ * Documentation sections:
+ * @li @ref opus_encoder
+ * @li @ref opus_decoder
+ * @li @ref opus_repacketizer
+ * @li @ref opus_multistream
+ * @li @ref opus_libinfo
+ * @li @ref opus_custom
+ */
+
+/** @defgroup opus_encoder Opus Encoder
+ * @{
+ *
+ * @brief This page describes the process and functions used to encode Opus.
+ *
+ * Since Opus is a stateful codec, the encoding process starts with creating an encoder
+ * state. This can be done with:
+ *
+ * @code
+ * int error;
+ * OpusEncoder *enc;
+ * enc = opus_encoder_create(Fs, channels, application, &error);
+ * @endcode
+ *
+ * From this point, @c enc can be used for encoding an audio stream. An encoder state
+ * @b must @b not be used for more than one stream at the same time. Similarly, the encoder
+ * state @b must @b not be re-initialized for each frame.
+ *
+ * While opus_encoder_create() allocates memory for the state, it's also possible
+ * to initialize pre-allocated memory:
+ *
+ * @code
+ * int size;
+ * int error;
+ * OpusEncoder *enc;
+ * size = opus_encoder_get_size(channels);
+ * enc = malloc(size);
+ * error = opus_encoder_init(enc, Fs, channels, application);
+ * @endcode
+ *
+ * where opus_encoder_get_size() returns the required size for the encoder state. Note that
+ * future versions of this code may change the size, so no assuptions should be made about it.
+ *
+ * The encoder state is always continuous in memory and only a shallow copy is sufficient
+ * to copy it (e.g. memcpy())
+ *
+ * It is possible to change some of the encoder's settings using the opus_encoder_ctl()
+ * interface. All these settings already default to the recommended value, so they should
+ * only be changed when necessary. The most common settings one may want to change are:
+ *
+ * @code
+ * opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate));
+ * opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+ * opus_encoder_ctl(enc, OPUS_SET_SIGNAL(signal_type));
+ * @endcode
+ *
+ * where
+ *
+ * @arg bitrate is in bits per second (b/s)
+ * @arg complexity is a value from 1 to 10, where 1 is the lowest complexity and 10 is the highest
+ * @arg signal_type is either OPUS_AUTO (default), OPUS_SIGNAL_VOICE, or OPUS_SIGNAL_MUSIC
+ *
+ * See @ref opus_encoderctls and @ref opus_genericctls for a complete list of parameters that can be set or queried. Most parameters can be set or changed at any time during a stream.
+ *
+ * To encode a frame, opus_encode() or opus_encode_float() must be called with exactly one frame (2.5, 5, 10, 20, 40 or 60 ms) of audio data:
+ * @code
+ * len = opus_encode(enc, audio_frame, frame_size, packet, max_packet);
+ * @endcode
+ *
+ * where
+ * <ul>
+ * <li>audio_frame is the audio data in opus_int16 (or float for opus_encode_float())</li>
+ * <li>frame_size is the duration of the frame in samples (per channel)</li>
+ * <li>packet is the byte array to which the compressed data is written</li>
+ * <li>max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended).
+ * Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.</li>
+ * </ul>
+ *
+ * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet.
+ * The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value
+ * is 1 byte, then the packet does not need to be transmitted (DTX).
+ *
+ * Once the encoder state if no longer needed, it can be destroyed with
+ *
+ * @code
+ * opus_encoder_destroy(enc);
+ * @endcode
+ *
+ * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(),
+ * then no action is required aside from potentially freeing the memory that was manually
+ * allocated for it (calling free(enc) for the example above)
+ *
+ */
+
+/** Opus encoder state.
+ * This contains the complete state of an Opus encoder.
+ * It is position independent and can be freely copied.
+ * @see opus_encoder_create,opus_encoder_init
+ */
+typedef struct OpusEncoder OpusEncoder;
+
+/** Gets the size of an <code>OpusEncoder</code> structure.
+ * @param[in] channels <tt>int</tt>: Number of channels.
+ * This must be 1 or 2.
+ * @returns The size in bytes.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_encoder_get_size(int channels);
+
+/**
+ */
+
+/** Allocates and initializes an encoder state.
+ * There are three coding modes:
+ *
+ * @ref OPUS_APPLICATION_VOIP gives best quality at a given bitrate for voice
+ * signals. It enhances the input signal by high-pass filtering and
+ * emphasizing formants and harmonics. Optionally it includes in-band
+ * forward error correction to protect against packet loss. Use this
+ * mode for typical VoIP applications. Because of the enhancement,
+ * even at high bitrates the output may sound different from the input.
+ *
+ * @ref OPUS_APPLICATION_AUDIO gives best quality at a given bitrate for most
+ * non-voice signals like music. Use this mode for music and mixed
+ * (music/voice) content, broadcast, and applications requiring less
+ * than 15 ms of coding delay.
+ *
+ * @ref OPUS_APPLICATION_RESTRICTED_LOWDELAY configures low-delay mode that
+ * disables the speech-optimized mode in exchange for slightly reduced delay.
+ * This mode can only be set on an newly initialized or freshly reset encoder
+ * because it changes the codec delay.
+ *
+ * This is useful when the caller knows that the speech-optimized modes will not be needed (use with caution).
+ * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz)
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal
+ * @param [in] application <tt>int</tt>: Coding mode (@ref OPUS_APPLICATION_VOIP/@ref OPUS_APPLICATION_AUDIO/@ref OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ * @param [out] error <tt>int*</tt>: @ref opus_errorcodes
+ * @note Regardless of the sampling rate and number channels selected, the Opus encoder
+ * can switch to a lower audio bandwidth or number of channels if the bitrate
+ * selected is too low. This also means that it is safe to always use 48 kHz stereo input
+ * and let the encoder optimize the encoding.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create(
+ opus_int32 Fs,
+ int channels,
+ int application,
+ int *error
+);
+
+/** Initializes a previously allocated encoder state
+ * The memory pointed to by st must be at least the size returned by opus_encoder_get_size().
+ * This is intended for applications which use their own allocator instead of malloc.
+ * @see opus_encoder_create(),opus_encoder_get_size()
+ * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+ * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+ * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz)
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal
+ * @param [in] application <tt>int</tt>: Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO/OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ * @retval #OPUS_OK Success or @ref opus_errorcodes
+ */
+OPUS_EXPORT int opus_encoder_init(
+ OpusEncoder *st,
+ opus_int32 Fs,
+ int channels,
+ int application
+) OPUS_ARG_NONNULL(1);
+
+/** Encodes an Opus frame.
+ * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+ * @param [in] pcm <tt>opus_int16*</tt>: Input signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)
+ * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+ * input signal.
+ * This must be an Opus frame size for
+ * the encoder's sampling rate.
+ * For example, at 48 kHz the permitted
+ * values are 120, 240, 480, 960, 1920,
+ * and 2880.
+ * Passing in a duration of less than
+ * 10 ms (480 samples at 48 kHz) will
+ * prevent the encoder from using the LPC
+ * or hybrid modes.
+ * @param [out] data <tt>unsigned char*</tt>: Output payload.
+ * This must contain storage for at
+ * least \a max_data_bytes.
+ * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+ * memory for the output
+ * payload. This may be
+ * used to impose an upper limit on
+ * the instant bitrate, but should
+ * not be used as the only bitrate
+ * control. Use #OPUS_SET_BITRATE to
+ * control the bitrate.
+ * @returns The length of the encoded packet (in bytes) on success or a
+ * negative error code (see @ref opus_errorcodes) on failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode(
+ OpusEncoder *st,
+ const opus_int16 *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes an Opus frame from floating point input.
+ * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+ * @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0.
+ * Samples with a range beyond +/-1.0 are supported but will
+ * be clipped by decoders using the integer API and should
+ * only be used if it is known that the far end supports
+ * extended dynamic range.
+ * length is frame_size*channels*sizeof(float)
+ * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+ * input signal.
+ * This must be an Opus frame size for
+ * the encoder's sampling rate.
+ * For example, at 48 kHz the permitted
+ * values are 120, 240, 480, 960, 1920,
+ * and 2880.
+ * Passing in a duration of less than
+ * 10 ms (480 samples at 48 kHz) will
+ * prevent the encoder from using the LPC
+ * or hybrid modes.
+ * @param [out] data <tt>unsigned char*</tt>: Output payload.
+ * This must contain storage for at
+ * least \a max_data_bytes.
+ * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+ * memory for the output
+ * payload. This may be
+ * used to impose an upper limit on
+ * the instant bitrate, but should
+ * not be used as the only bitrate
+ * control. Use #OPUS_SET_BITRATE to
+ * control the bitrate.
+ * @returns The length of the encoded packet (in bytes) on success or a
+ * negative error code (see @ref opus_errorcodes) on failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode_float(
+ OpusEncoder *st,
+ const float *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Frees an <code>OpusEncoder</code> allocated by opus_encoder_create().
+ * @param[in] st <tt>OpusEncoder*</tt>: State to be freed.
+ */
+OPUS_EXPORT void opus_encoder_destroy(OpusEncoder *st);
+
+/** Perform a CTL function on an Opus encoder.
+ *
+ * Generally the request and subsequent arguments are generated
+ * by a convenience macro.
+ * @param st <tt>OpusEncoder*</tt>: Encoder state.
+ * @param request This and all remaining parameters should be replaced by one
+ * of the convenience macros in @ref opus_genericctls or
+ * @ref opus_encoderctls.
+ * @see opus_genericctls
+ * @see opus_encoderctls
+ */
+OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+/**@}*/
+
+/** @defgroup opus_decoder Opus Decoder
+ * @{
+ *
+ * @brief This page describes the process and functions used to decode Opus.
+ *
+ * The decoding process also starts with creating a decoder
+ * state. This can be done with:
+ * @code
+ * int error;
+ * OpusDecoder *dec;
+ * dec = opus_decoder_create(Fs, channels, &error);
+ * @endcode
+ * where
+ * @li Fs is the sampling rate and must be 8000, 12000, 16000, 24000, or 48000
+ * @li channels is the number of channels (1 or 2)
+ * @li error will hold the error code in case of failure (or #OPUS_OK on success)
+ * @li the return value is a newly created decoder state to be used for decoding
+ *
+ * While opus_decoder_create() allocates memory for the state, it's also possible
+ * to initialize pre-allocated memory:
+ * @code
+ * int size;
+ * int error;
+ * OpusDecoder *dec;
+ * size = opus_decoder_get_size(channels);
+ * dec = malloc(size);
+ * error = opus_decoder_init(dec, Fs, channels);
+ * @endcode
+ * where opus_decoder_get_size() returns the required size for the decoder state. Note that
+ * future versions of this code may change the size, so no assuptions should be made about it.
+ *
+ * The decoder state is always continuous in memory and only a shallow copy is sufficient
+ * to copy it (e.g. memcpy())
+ *
+ * To decode a frame, opus_decode() or opus_decode_float() must be called with a packet of compressed audio data:
+ * @code
+ * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0);
+ * @endcode
+ * where
+ *
+ * @li packet is the byte array containing the compressed data
+ * @li len is the exact number of bytes contained in the packet
+ * @li decoded is the decoded audio data in opus_int16 (or float for opus_decode_float())
+ * @li max_size is the max duration of the frame in samples (per channel) that can fit into the decoded_frame array
+ *
+ * opus_decode() and opus_decode_float() return the number of samples (per channel) decoded from the packet.
+ * If that value is negative, then an error has occurred. This can occur if the packet is corrupted or if the audio
+ * buffer is too small to hold the decoded audio.
+ *
+ * Opus is a stateful codec with overlapping blocks and as a result Opus
+ * packets are not coded independently of each other. Packets must be
+ * passed into the decoder serially and in the correct order for a correct
+ * decode. Lost packets can be replaced with loss concealment by calling
+ * the decoder with a null pointer and zero length for the missing packet.
+ *
+ * A single codec state may only be accessed from a single thread at
+ * a time and any required locking must be performed by the caller. Separate
+ * streams must be decoded with separate decoder states and can be decoded
+ * in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK
+ * defined.
+ *
+ */
+
+/** Opus decoder state.
+ * This contains the complete state of an Opus decoder.
+ * It is position independent and can be freely copied.
+ * @see opus_decoder_create,opus_decoder_init
+ */
+typedef struct OpusDecoder OpusDecoder;
+
+/** Gets the size of an <code>OpusDecoder</code> structure.
+ * @param [in] channels <tt>int</tt>: Number of channels.
+ * This must be 1 or 2.
+ * @returns The size in bytes.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_size(int channels);
+
+/** Allocates and initializes a decoder state.
+ * @param [in] Fs <tt>opus_int32</tt>: Sample rate to decode at (Hz).
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode
+ * @param [out] error <tt>int*</tt>: #OPUS_OK Success or @ref opus_errorcodes
+ *
+ * Internally Opus stores data at 48000 Hz, so that should be the default
+ * value for Fs. However, the decoder can efficiently decode to buffers
+ * at 8, 12, 16, and 24 kHz so if for some reason the caller cannot use
+ * data at the full sample rate, or knows the compressed data doesn't
+ * use the full frequency range, it can request decoding at a reduced
+ * rate. Likewise, the decoder is capable of filling in either mono or
+ * interleaved stereo pcm buffers, at the caller's request.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusDecoder *opus_decoder_create(
+ opus_int32 Fs,
+ int channels,
+ int *error
+);
+
+/** Initializes a previously allocated decoder state.
+ * The state must be at least the size returned by opus_decoder_get_size().
+ * This is intended for applications which use their own allocator instead of malloc. @see opus_decoder_create,opus_decoder_get_size
+ * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+ * @param [in] st <tt>OpusDecoder*</tt>: Decoder state.
+ * @param [in] Fs <tt>opus_int32</tt>: Sampling rate to decode to (Hz).
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode
+ * @retval #OPUS_OK Success or @ref opus_errorcodes
+ */
+OPUS_EXPORT int opus_decoder_init(
+ OpusDecoder *st,
+ opus_int32 Fs,
+ int channels
+) OPUS_ARG_NONNULL(1);
+
+/** Decode an Opus packet.
+ * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+ * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+ * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload*
+ * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length
+ * is frame_size*channels*sizeof(opus_int16)
+ * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+ * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+ * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+ * then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+ * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+ * FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+ * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+ * decoded. If no such data is available, the frame is decoded as if it were lost.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode(
+ OpusDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ opus_int16 *pcm,
+ int frame_size,
+ int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode an Opus packet with floating point output.
+ * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+ * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+ * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload
+ * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length
+ * is frame_size*channels*sizeof(float)
+ * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+ * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+ * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+ * then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+ * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+ * FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+ * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+ * decoded. If no such data is available the frame is decoded as if it were lost.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode_float(
+ OpusDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ float *pcm,
+ int frame_size,
+ int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus decoder.
+ *
+ * Generally the request and subsequent arguments are generated
+ * by a convenience macro.
+ * @param st <tt>OpusDecoder*</tt>: Decoder state.
+ * @param request This and all remaining parameters should be replaced by one
+ * of the convenience macros in @ref opus_genericctls or
+ * @ref opus_decoderctls.
+ * @see opus_genericctls
+ * @see opus_decoderctls
+ */
+OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/** Frees an <code>OpusDecoder</code> allocated by opus_decoder_create().
+ * @param[in] st <tt>OpusDecoder*</tt>: State to be freed.
+ */
+OPUS_EXPORT void opus_decoder_destroy(OpusDecoder *st);
+
+/** Parse an opus packet into one or more frames.
+ * Opus_decode will perform this operation internally so most applications do
+ * not need to use this function.
+ * This function does not copy the frames, the returned pointers are pointers into
+ * the input packet.
+ * @param [in] data <tt>char*</tt>: Opus packet to be parsed
+ * @param [in] len <tt>opus_int32</tt>: size of data
+ * @param [out] out_toc <tt>char*</tt>: TOC pointer
+ * @param [out] frames <tt>char*[48]</tt> encapsulated frames
+ * @param [out] size <tt>opus_int16[48]</tt> sizes of the encapsulated frames
+ * @param [out] payload_offset <tt>int*</tt>: returns the position of the payload within the packet (in bytes)
+ * @returns number of frames
+ */
+OPUS_EXPORT int opus_packet_parse(
+ const unsigned char *data,
+ opus_int32 len,
+ unsigned char *out_toc,
+ const unsigned char *frames[48],
+ opus_int16 size[48],
+ int *payload_offset
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Gets the bandwidth of an Opus packet.
+ * @param [in] data <tt>char*</tt>: Opus packet
+ * @retval OPUS_BANDWIDTH_NARROWBAND Narrowband (4kHz bandpass)
+ * @retval OPUS_BANDWIDTH_MEDIUMBAND Mediumband (6kHz bandpass)
+ * @retval OPUS_BANDWIDTH_WIDEBAND Wideband (8kHz bandpass)
+ * @retval OPUS_BANDWIDTH_SUPERWIDEBAND Superwideband (12kHz bandpass)
+ * @retval OPUS_BANDWIDTH_FULLBAND Fullband (20kHz bandpass)
+ * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_bandwidth(const unsigned char *data) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples per frame from an Opus packet.
+ * @param [in] data <tt>char*</tt>: Opus packet.
+ * This must contain at least one byte of
+ * data.
+ * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz.
+ * This must be a multiple of 400, or
+ * inaccurate results will be returned.
+ * @returns Number of samples per frame.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_samples_per_frame(const unsigned char *data, opus_int32 Fs) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of channels from an Opus packet.
+ * @param [in] data <tt>char*</tt>: Opus packet
+ * @returns Number of channels
+ * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_channels(const unsigned char *data) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of frames in an Opus packet.
+ * @param [in] packet <tt>char*</tt>: Opus packet
+ * @param [in] len <tt>opus_int32</tt>: Length of packet
+ * @returns Number of frames
+ * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+ * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples of an Opus packet.
+ * @param [in] packet <tt>char*</tt>: Opus packet
+ * @param [in] len <tt>opus_int32</tt>: Length of packet
+ * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz.
+ * This must be a multiple of 400, or
+ * inaccurate results will be returned.
+ * @returns Number of samples
+ * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+ * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, opus_int32 Fs) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples of an Opus packet.
+ * @param [in] dec <tt>OpusDecoder*</tt>: Decoder state
+ * @param [in] packet <tt>char*</tt>: Opus packet
+ * @param [in] len <tt>opus_int32</tt>: Length of packet
+ * @returns Number of samples
+ * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+ * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+/** Applies soft-clipping to bring a float signal within the [-1,1] range. If
+ * the signal is already in that range, nothing is done. If there are values
+ * outside of [-1,1], then the signal is clipped as smoothly as possible to
+ * both fit in the range and avoid creating excessive distortion in the
+ * process.
+ * @param [in,out] pcm <tt>float*</tt>: Input PCM and modified PCM
+ * @param [in] frame_size <tt>int</tt> Number of samples per channel to process
+ * @param [in] channels <tt>int</tt>: Number of channels
+ * @param [in,out] softclip_mem <tt>float*</tt>: State memory for the soft clipping process (one float per channel, initialized to zero)
+ */
+OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, float *softclip_mem);
+
+
+/**@}*/
+
+/** @defgroup opus_repacketizer Repacketizer
+ * @{
+ *
+ * The repacketizer can be used to merge multiple Opus packets into a single
+ * packet or alternatively to split Opus packets that have previously been
+ * merged. Splitting valid Opus packets is always guaranteed to succeed,
+ * whereas merging valid packets only succeeds if all frames have the same
+ * mode, bandwidth, and frame size, and when the total duration of the merged
+ * packet is no more than 120 ms.
+ * The repacketizer currently only operates on elementary Opus
+ * streams. It will not manipualte multistream packets successfully, except in
+ * the degenerate case where they consist of data from a single stream.
+ *
+ * The repacketizing process starts with creating a repacketizer state, either
+ * by calling opus_repacketizer_create() or by allocating the memory yourself,
+ * e.g.,
+ * @code
+ * OpusRepacketizer *rp;
+ * rp = (OpusRepacketizer*)malloc(opus_repacketizer_get_size());
+ * if (rp != NULL)
+ * opus_repacketizer_init(rp);
+ * @endcode
+ *
+ * Then the application should submit packets with opus_repacketizer_cat(),
+ * extract new packets with opus_repacketizer_out() or
+ * opus_repacketizer_out_range(), and then reset the state for the next set of
+ * input packets via opus_repacketizer_init().
+ *
+ * For example, to split a sequence of packets into individual frames:
+ * @code
+ * unsigned char *data;
+ * int len;
+ * while (get_next_packet(&data, &len))
+ * {
+ * unsigned char out[1276];
+ * opus_int32 out_len;
+ * int nb_frames;
+ * int err;
+ * int i;
+ * err = opus_repacketizer_cat(rp, data, len);
+ * if (err != OPUS_OK)
+ * {
+ * release_packet(data);
+ * return err;
+ * }
+ * nb_frames = opus_repacketizer_get_nb_frames(rp);
+ * for (i = 0; i < nb_frames; i++)
+ * {
+ * out_len = opus_repacketizer_out_range(rp, i, i+1, out, sizeof(out));
+ * if (out_len < 0)
+ * {
+ * release_packet(data);
+ * return (int)out_len;
+ * }
+ * output_next_packet(out, out_len);
+ * }
+ * opus_repacketizer_init(rp);
+ * release_packet(data);
+ * }
+ * @endcode
+ *
+ * Alternatively, to combine a sequence of frames into packets that each
+ * contain up to <code>TARGET_DURATION_MS</code> milliseconds of data:
+ * @code
+ * // The maximum number of packets with duration TARGET_DURATION_MS occurs
+ * // when the frame size is 2.5 ms, for a total of (TARGET_DURATION_MS*2/5)
+ * // packets.
+ * unsigned char *data[(TARGET_DURATION_MS*2/5)+1];
+ * opus_int32 len[(TARGET_DURATION_MS*2/5)+1];
+ * int nb_packets;
+ * unsigned char out[1277*(TARGET_DURATION_MS*2/2)];
+ * opus_int32 out_len;
+ * int prev_toc;
+ * nb_packets = 0;
+ * while (get_next_packet(data+nb_packets, len+nb_packets))
+ * {
+ * int nb_frames;
+ * int err;
+ * nb_frames = opus_packet_get_nb_frames(data[nb_packets], len[nb_packets]);
+ * if (nb_frames < 1)
+ * {
+ * release_packets(data, nb_packets+1);
+ * return nb_frames;
+ * }
+ * nb_frames += opus_repacketizer_get_nb_frames(rp);
+ * // If adding the next packet would exceed our target, or it has an
+ * // incompatible TOC sequence, output the packets we already have before
+ * // submitting it.
+ * // N.B., The nb_packets > 0 check ensures we've submitted at least one
+ * // packet since the last call to opus_repacketizer_init(). Otherwise a
+ * // single packet longer than TARGET_DURATION_MS would cause us to try to
+ * // output an (invalid) empty packet. It also ensures that prev_toc has
+ * // been set to a valid value. Additionally, len[nb_packets] > 0 is
+ * // guaranteed by the call to opus_packet_get_nb_frames() above, so the
+ * // reference to data[nb_packets][0] should be valid.
+ * if (nb_packets > 0 && (
+ * ((prev_toc & 0xFC) != (data[nb_packets][0] & 0xFC)) ||
+ * opus_packet_get_samples_per_frame(data[nb_packets], 48000)*nb_frames >
+ * TARGET_DURATION_MS*48))
+ * {
+ * out_len = opus_repacketizer_out(rp, out, sizeof(out));
+ * if (out_len < 0)
+ * {
+ * release_packets(data, nb_packets+1);
+ * return (int)out_len;
+ * }
+ * output_next_packet(out, out_len);
+ * opus_repacketizer_init(rp);
+ * release_packets(data, nb_packets);
+ * data[0] = data[nb_packets];
+ * len[0] = len[nb_packets];
+ * nb_packets = 0;
+ * }
+ * err = opus_repacketizer_cat(rp, data[nb_packets], len[nb_packets]);
+ * if (err != OPUS_OK)
+ * {
+ * release_packets(data, nb_packets+1);
+ * return err;
+ * }
+ * prev_toc = data[nb_packets][0];
+ * nb_packets++;
+ * }
+ * // Output the final, partial packet.
+ * if (nb_packets > 0)
+ * {
+ * out_len = opus_repacketizer_out(rp, out, sizeof(out));
+ * release_packets(data, nb_packets);
+ * if (out_len < 0)
+ * return (int)out_len;
+ * output_next_packet(out, out_len);
+ * }
+ * @endcode
+ *
+ * An alternate way of merging packets is to simply call opus_repacketizer_cat()
+ * unconditionally until it fails. At that point, the merged packet can be
+ * obtained with opus_repacketizer_out() and the input packet for which
+ * opus_repacketizer_cat() needs to be re-added to a newly reinitialized
+ * repacketizer state.
+ */
+
+typedef struct OpusRepacketizer OpusRepacketizer;
+
+/** Gets the size of an <code>OpusRepacketizer</code> structure.
+ * @returns The size in bytes.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_size(void);
+
+/** (Re)initializes a previously allocated repacketizer state.
+ * The state must be at least the size returned by opus_repacketizer_get_size().
+ * This can be used for applications which use their own allocator instead of
+ * malloc().
+ * It must also be called to reset the queue of packets waiting to be
+ * repacketized, which is necessary if the maximum packet duration of 120 ms
+ * is reached or if you wish to submit packets with a different Opus
+ * configuration (coding mode, audio bandwidth, frame size, or channel count).
+ * Failure to do so will prevent a new packet from being added with
+ * opus_repacketizer_cat().
+ * @see opus_repacketizer_create
+ * @see opus_repacketizer_get_size
+ * @see opus_repacketizer_cat
+ * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to
+ * (re)initialize.
+ * @returns A pointer to the same repacketizer state that was passed in.
+ */
+OPUS_EXPORT OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1);
+
+/** Allocates memory and initializes the new repacketizer with
+ * opus_repacketizer_init().
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusRepacketizer *opus_repacketizer_create(void);
+
+/** Frees an <code>OpusRepacketizer</code> allocated by
+ * opus_repacketizer_create().
+ * @param[in] rp <tt>OpusRepacketizer*</tt>: State to be freed.
+ */
+OPUS_EXPORT void opus_repacketizer_destroy(OpusRepacketizer *rp);
+
+/** Add a packet to the current repacketizer state.
+ * This packet must match the configuration of any packets already submitted
+ * for repacketization since the last call to opus_repacketizer_init().
+ * This means that it must have the same coding mode, audio bandwidth, frame
+ * size, and channel count.
+ * This can be checked in advance by examining the top 6 bits of the first
+ * byte of the packet, and ensuring they match the top 6 bits of the first
+ * byte of any previously submitted packet.
+ * The total duration of audio in the repacketizer state also must not exceed
+ * 120 ms, the maximum duration of a single packet, after adding this packet.
+ *
+ * The contents of the current repacketizer state can be extracted into new
+ * packets using opus_repacketizer_out() or opus_repacketizer_out_range().
+ *
+ * In order to add a packet with a different configuration or to add more
+ * audio beyond 120 ms, you must clear the repacketizer state by calling
+ * opus_repacketizer_init().
+ * If a packet is too large to add to the current repacketizer state, no part
+ * of it is added, even if it contains multiple frames, some of which might
+ * fit.
+ * If you wish to be able to add parts of such packets, you should first use
+ * another repacketizer to split the packet into pieces and add them
+ * individually.
+ * @see opus_repacketizer_out_range
+ * @see opus_repacketizer_out
+ * @see opus_repacketizer_init
+ * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to which to
+ * add the packet.
+ * @param[in] data <tt>const unsigned char*</tt>: The packet data.
+ * The application must ensure
+ * this pointer remains valid
+ * until the next call to
+ * opus_repacketizer_init() or
+ * opus_repacketizer_destroy().
+ * @param len <tt>opus_int32</tt>: The number of bytes in the packet data.
+ * @returns An error code indicating whether or not the operation succeeded.
+ * @retval #OPUS_OK The packet's contents have been added to the repacketizer
+ * state.
+ * @retval #OPUS_INVALID_PACKET The packet did not have a valid TOC sequence,
+ * the packet's TOC sequence was not compatible
+ * with previously submitted packets (because
+ * the coding mode, audio bandwidth, frame size,
+ * or channel count did not match), or adding
+ * this packet would increase the total amount of
+ * audio stored in the repacketizer state to more
+ * than 120 ms.
+ */
+OPUS_EXPORT int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+
+/** Construct a new packet from data previously submitted to the repacketizer
+ * state via opus_repacketizer_cat().
+ * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to
+ * construct the new packet.
+ * @param begin <tt>int</tt>: The index of the first frame in the current
+ * repacketizer state to include in the output.
+ * @param end <tt>int</tt>: One past the index of the last frame in the
+ * current repacketizer state to include in the
+ * output.
+ * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to
+ * store the output packet.
+ * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in
+ * the output buffer. In order to guarantee
+ * success, this should be at least
+ * <code>1276</code> for a single frame,
+ * or for multiple frames,
+ * <code>1277*(end-begin)</code>.
+ * However, <code>1*(end-begin)</code> plus
+ * the size of all packet data submitted to
+ * the repacketizer since the last call to
+ * opus_repacketizer_init() or
+ * opus_repacketizer_create() is also
+ * sufficient, and possibly much smaller.
+ * @returns The total size of the output packet on success, or an error code
+ * on failure.
+ * @retval #OPUS_BAD_ARG <code>[begin,end)</code> was an invalid range of
+ * frames (begin < 0, begin >= end, or end >
+ * opus_repacketizer_get_nb_frames()).
+ * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the
+ * complete output packet.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Return the total number of frames contained in packet data submitted to
+ * the repacketizer state so far via opus_repacketizer_cat() since the last
+ * call to opus_repacketizer_init() or opus_repacketizer_create().
+ * This defines the valid range of packets that can be extracted with
+ * opus_repacketizer_out_range() or opus_repacketizer_out().
+ * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state containing the
+ * frames.
+ * @returns The total number of frames contained in the packet data submitted
+ * to the repacketizer state.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1);
+
+/** Construct a new packet from data previously submitted to the repacketizer
+ * state via opus_repacketizer_cat().
+ * This is a convenience routine that returns all the data submitted so far
+ * in a single packet.
+ * It is equivalent to calling
+ * @code
+ * opus_repacketizer_out_range(rp, 0, opus_repacketizer_get_nb_frames(rp),
+ * data, maxlen)
+ * @endcode
+ * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to
+ * construct the new packet.
+ * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to
+ * store the output packet.
+ * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in
+ * the output buffer. In order to guarantee
+ * success, this should be at least
+ * <code>1277*opus_repacketizer_get_nb_frames(rp)</code>.
+ * However,
+ * <code>1*opus_repacketizer_get_nb_frames(rp)</code>
+ * plus the size of all packet data
+ * submitted to the repacketizer since the
+ * last call to opus_repacketizer_init() or
+ * opus_repacketizer_create() is also
+ * sufficient, and possibly much smaller.
+ * @returns The total size of the output packet on success, or an error code
+ * on failure.
+ * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the
+ * complete output packet.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1);
+
+/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence).
+ * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+ * packet to pad.
+ * @param len <tt>opus_int32</tt>: The size of the packet.
+ * This must be at least 1.
+ * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+ * This must be at least as large as len.
+ * @returns an error code
+ * @retval #OPUS_OK \a on success.
+ * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+ * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+ */
+OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+/** Remove all padding from a given Opus packet and rewrite the TOC sequence to
+ * minimize space usage.
+ * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+ * packet to strip.
+ * @param len <tt>opus_int32</tt>: The size of the packet.
+ * This must be at least 1.
+ * @returns The new size of the output packet on success, or an error code
+ * on failure.
+ * @retval #OPUS_BAD_ARG \a len was less than 1.
+ * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len);
+
+/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence).
+ * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+ * packet to pad.
+ * @param len <tt>opus_int32</tt>: The size of the packet.
+ * This must be at least 1.
+ * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+ * This must be at least 1.
+ * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+ * This must be at least as large as len.
+ * @returns an error code
+ * @retval #OPUS_OK \a on success.
+ * @retval #OPUS_BAD_ARG \a len was less than 1.
+ * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+ */
+OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams);
+
+/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to
+ * minimize space usage.
+ * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+ * packet to strip.
+ * @param len <tt>opus_int32</tt>: The size of the packet.
+ * This must be at least 1.
+ * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+ * This must be at least 1.
+ * @returns The new size of the output packet on success, or an error code
+ * on failure.
+ * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+ * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_H */
diff --git a/drivers/opus/opus_compare.c b/drivers/opus/opus_compare.c
new file mode 100644
index 0000000000..06c67d752f
--- /dev/null
+++ b/drivers/opus/opus_compare.c
@@ -0,0 +1,379 @@
+/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation
+ Written by Jean-Marc Valin and Timothy B. Terriberry */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define OPUS_PI (3.14159265F)
+
+#define OPUS_COSF(_x) ((float)cos(_x))
+#define OPUS_SINF(_x) ((float)sin(_x))
+
+static void *check_alloc(void *_ptr){
+ if(_ptr==NULL){
+ fprintf(stderr,"Out of memory.\n");
+ exit(EXIT_FAILURE);
+ }
+ return _ptr;
+}
+
+static void *opus_malloc(size_t _size){
+ return check_alloc(malloc(_size));
+}
+
+static void *opus_realloc(void *_ptr,size_t _size){
+ return check_alloc(realloc(_ptr,_size));
+}
+
+static size_t read_pcm16(float **_samples,FILE *_fin,int _nchannels){
+ unsigned char buf[1024];
+ float *samples;
+ size_t nsamples;
+ size_t csamples;
+ size_t xi;
+ size_t nread;
+ samples=NULL;
+ nsamples=csamples=0;
+ for(;;){
+ nread=fread(buf,2*_nchannels,1024/(2*_nchannels),_fin);
+ if(nread<=0)break;
+ if(nsamples+nread>csamples){
+ do csamples=csamples<<1|1;
+ while(nsamples+nread>csamples);
+ samples=(float *)opus_realloc(samples,
+ _nchannels*csamples*sizeof(*samples));
+ }
+ for(xi=0;xi<nread;xi++){
+ int ci;
+ for(ci=0;ci<_nchannels;ci++){
+ int s;
+ s=buf[2*(xi*_nchannels+ci)+1]<<8|buf[2*(xi*_nchannels+ci)];
+ s=((s&0xFFFF)^0x8000)-0x8000;
+ samples[(nsamples+xi)*_nchannels+ci]=s;
+ }
+ }
+ nsamples+=nread;
+ }
+ *_samples=(float *)opus_realloc(samples,
+ _nchannels*nsamples*sizeof(*samples));
+ return nsamples;
+}
+
+static void band_energy(float *_out,float *_ps,const int *_bands,int _nbands,
+ const float *_in,int _nchannels,size_t _nframes,int _window_sz,
+ int _step,int _downsample){
+ float *window;
+ float *x;
+ float *c;
+ float *s;
+ size_t xi;
+ int xj;
+ int ps_sz;
+ window=(float *)opus_malloc((3+_nchannels)*_window_sz*sizeof(*window));
+ c=window+_window_sz;
+ s=c+_window_sz;
+ x=s+_window_sz;
+ ps_sz=_window_sz/2;
+ for(xj=0;xj<_window_sz;xj++){
+ window[xj]=0.5F-0.5F*OPUS_COSF((2*OPUS_PI/(_window_sz-1))*xj);
+ }
+ for(xj=0;xj<_window_sz;xj++){
+ c[xj]=OPUS_COSF((2*OPUS_PI/_window_sz)*xj);
+ }
+ for(xj=0;xj<_window_sz;xj++){
+ s[xj]=OPUS_SINF((2*OPUS_PI/_window_sz)*xj);
+ }
+ for(xi=0;xi<_nframes;xi++){
+ int ci;
+ int xk;
+ int bi;
+ for(ci=0;ci<_nchannels;ci++){
+ for(xk=0;xk<_window_sz;xk++){
+ x[ci*_window_sz+xk]=window[xk]*_in[(xi*_step+xk)*_nchannels+ci];
+ }
+ }
+ for(bi=xj=0;bi<_nbands;bi++){
+ float p[2]={0};
+ for(;xj<_bands[bi+1];xj++){
+ for(ci=0;ci<_nchannels;ci++){
+ float re;
+ float im;
+ int ti;
+ ti=0;
+ re=im=0;
+ for(xk=0;xk<_window_sz;xk++){
+ re+=c[ti]*x[ci*_window_sz+xk];
+ im-=s[ti]*x[ci*_window_sz+xk];
+ ti+=xj;
+ if(ti>=_window_sz)ti-=_window_sz;
+ }
+ re*=_downsample;
+ im*=_downsample;
+ _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+100000;
+ p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci];
+ }
+ }
+ if(_out){
+ _out[(xi*_nbands+bi)*_nchannels]=p[0]/(_bands[bi+1]-_bands[bi]);
+ if(_nchannels==2){
+ _out[(xi*_nbands+bi)*_nchannels+1]=p[1]/(_bands[bi+1]-_bands[bi]);
+ }
+ }
+ }
+ }
+ free(window);
+}
+
+#define NBANDS (21)
+#define NFREQS (240)
+
+/*Bands on which we compute the pseudo-NMR (Bark-derived
+ CELT bands).*/
+static const int BANDS[NBANDS+1]={
+ 0,2,4,6,8,10,12,14,16,20,24,28,32,40,48,56,68,80,96,120,156,200
+};
+
+#define TEST_WIN_SIZE (480)
+#define TEST_WIN_STEP (120)
+
+int main(int _argc,const char **_argv){
+ FILE *fin1;
+ FILE *fin2;
+ float *x;
+ float *y;
+ float *xb;
+ float *X;
+ float *Y;
+ double err;
+ float Q;
+ size_t xlength;
+ size_t ylength;
+ size_t nframes;
+ size_t xi;
+ int ci;
+ int xj;
+ int bi;
+ int nchannels;
+ unsigned rate;
+ int downsample;
+ int ybands;
+ int yfreqs;
+ int max_compare;
+ if(_argc<3||_argc>6){
+ fprintf(stderr,"Usage: %s [-s] [-r rate2] <file1.sw> <file2.sw>\n",
+ _argv[0]);
+ return EXIT_FAILURE;
+ }
+ nchannels=1;
+ if(strcmp(_argv[1],"-s")==0){
+ nchannels=2;
+ _argv++;
+ }
+ rate=48000;
+ ybands=NBANDS;
+ yfreqs=NFREQS;
+ downsample=1;
+ if(strcmp(_argv[1],"-r")==0){
+ rate=atoi(_argv[2]);
+ if(rate!=8000&&rate!=12000&&rate!=16000&&rate!=24000&&rate!=48000){
+ fprintf(stderr,
+ "Sampling rate must be 8000, 12000, 16000, 24000, or 48000\n");
+ return EXIT_FAILURE;
+ }
+ downsample=48000/rate;
+ switch(rate){
+ case 8000:ybands=13;break;
+ case 12000:ybands=15;break;
+ case 16000:ybands=17;break;
+ case 24000:ybands=19;break;
+ }
+ yfreqs=NFREQS/downsample;
+ _argv+=2;
+ }
+ fin1=fopen(_argv[1],"rb");
+ if(fin1==NULL){
+ fprintf(stderr,"Error opening '%s'.\n",_argv[1]);
+ return EXIT_FAILURE;
+ }
+ fin2=fopen(_argv[2],"rb");
+ if(fin2==NULL){
+ fprintf(stderr,"Error opening '%s'.\n",_argv[2]);
+ fclose(fin1);
+ return EXIT_FAILURE;
+ }
+ /*Read in the data and allocate scratch space.*/
+ xlength=read_pcm16(&x,fin1,2);
+ if(nchannels==1){
+ for(xi=0;xi<xlength;xi++)x[xi]=.5*(x[2*xi]+x[2*xi+1]);
+ }
+ fclose(fin1);
+ ylength=read_pcm16(&y,fin2,nchannels);
+ fclose(fin2);
+ if(xlength!=ylength*downsample){
+ fprintf(stderr,"Sample counts do not match (%lu!=%lu).\n",
+ (unsigned long)xlength,(unsigned long)ylength*downsample);
+ return EXIT_FAILURE;
+ }
+ if(xlength<TEST_WIN_SIZE){
+ fprintf(stderr,"Insufficient sample data (%lu<%i).\n",
+ (unsigned long)xlength,TEST_WIN_SIZE);
+ return EXIT_FAILURE;
+ }
+ nframes=(xlength-TEST_WIN_SIZE+TEST_WIN_STEP)/TEST_WIN_STEP;
+ xb=(float *)opus_malloc(nframes*NBANDS*nchannels*sizeof(*xb));
+ X=(float *)opus_malloc(nframes*NFREQS*nchannels*sizeof(*X));
+ Y=(float *)opus_malloc(nframes*yfreqs*nchannels*sizeof(*Y));
+ /*Compute the per-band spectral energy of the original signal
+ and the error.*/
+ band_energy(xb,X,BANDS,NBANDS,x,nchannels,nframes,
+ TEST_WIN_SIZE,TEST_WIN_STEP,1);
+ free(x);
+ band_energy(NULL,Y,BANDS,ybands,y,nchannels,nframes,
+ TEST_WIN_SIZE/downsample,TEST_WIN_STEP/downsample,downsample);
+ free(y);
+ for(xi=0;xi<nframes;xi++){
+ /*Frequency masking (low to high): 10 dB/Bark slope.*/
+ for(bi=1;bi<NBANDS;bi++){
+ for(ci=0;ci<nchannels;ci++){
+ xb[(xi*NBANDS+bi)*nchannels+ci]+=
+ 0.1F*xb[(xi*NBANDS+bi-1)*nchannels+ci];
+ }
+ }
+ /*Frequency masking (high to low): 15 dB/Bark slope.*/
+ for(bi=NBANDS-1;bi-->0;){
+ for(ci=0;ci<nchannels;ci++){
+ xb[(xi*NBANDS+bi)*nchannels+ci]+=
+ 0.03F*xb[(xi*NBANDS+bi+1)*nchannels+ci];
+ }
+ }
+ if(xi>0){
+ /*Temporal masking: -3 dB/2.5ms slope.*/
+ for(bi=0;bi<NBANDS;bi++){
+ for(ci=0;ci<nchannels;ci++){
+ xb[(xi*NBANDS+bi)*nchannels+ci]+=
+ 0.5F*xb[((xi-1)*NBANDS+bi)*nchannels+ci];
+ }
+ }
+ }
+ /* Allowing some cross-talk */
+ if(nchannels==2){
+ for(bi=0;bi<NBANDS;bi++){
+ float l,r;
+ l=xb[(xi*NBANDS+bi)*nchannels+0];
+ r=xb[(xi*NBANDS+bi)*nchannels+1];
+ xb[(xi*NBANDS+bi)*nchannels+0]+=0.01F*r;
+ xb[(xi*NBANDS+bi)*nchannels+1]+=0.01F*l;
+ }
+ }
+
+ /* Apply masking */
+ for(bi=0;bi<ybands;bi++){
+ for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){
+ for(ci=0;ci<nchannels;ci++){
+ X[(xi*NFREQS+xj)*nchannels+ci]+=
+ 0.1F*xb[(xi*NBANDS+bi)*nchannels+ci];
+ Y[(xi*yfreqs+xj)*nchannels+ci]+=
+ 0.1F*xb[(xi*NBANDS+bi)*nchannels+ci];
+ }
+ }
+ }
+ }
+
+ /* Average of consecutive frames to make comparison slightly less sensitive */
+ for(bi=0;bi<ybands;bi++){
+ for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){
+ for(ci=0;ci<nchannels;ci++){
+ float xtmp;
+ float ytmp;
+ xtmp = X[xj*nchannels+ci];
+ ytmp = Y[xj*nchannels+ci];
+ for(xi=1;xi<nframes;xi++){
+ float xtmp2;
+ float ytmp2;
+ xtmp2 = X[(xi*NFREQS+xj)*nchannels+ci];
+ ytmp2 = Y[(xi*yfreqs+xj)*nchannels+ci];
+ X[(xi*NFREQS+xj)*nchannels+ci] += xtmp;
+ Y[(xi*yfreqs+xj)*nchannels+ci] += ytmp;
+ xtmp = xtmp2;
+ ytmp = ytmp2;
+ }
+ }
+ }
+ }
+
+ /*If working at a lower sampling rate, don't take into account the last
+ 300 Hz to allow for different transition bands.
+ For 12 kHz, we don't skip anything, because the last band already skips
+ 400 Hz.*/
+ if(rate==48000)max_compare=BANDS[NBANDS];
+ else if(rate==12000)max_compare=BANDS[ybands];
+ else max_compare=BANDS[ybands]-3;
+ err=0;
+ for(xi=0;xi<nframes;xi++){
+ double Ef;
+ Ef=0;
+ for(bi=0;bi<ybands;bi++){
+ double Eb;
+ Eb=0;
+ for(xj=BANDS[bi];xj<BANDS[bi+1]&&xj<max_compare;xj++){
+ for(ci=0;ci<nchannels;ci++){
+ float re;
+ float im;
+ re=Y[(xi*yfreqs+xj)*nchannels+ci]/X[(xi*NFREQS+xj)*nchannels+ci];
+ im=re-log(re)-1;
+ /*Make comparison less sensitive around the SILK/CELT cross-over to
+ allow for mode freedom in the filters.*/
+ if(xj>=79&&xj<=81)im*=0.1F;
+ if(xj==80)im*=0.1F;
+ Eb+=im;
+ }
+ }
+ Eb /= (BANDS[bi+1]-BANDS[bi])*nchannels;
+ Ef += Eb*Eb;
+ }
+ /*Using a fixed normalization value means we're willing to accept slightly
+ lower quality for lower sampling rates.*/
+ Ef/=NBANDS;
+ Ef*=Ef;
+ err+=Ef*Ef;
+ }
+ err=pow(err/nframes,1.0/16);
+ Q=100*(1-0.5*log(1+err)/log(1.13));
+ if(Q<0){
+ fprintf(stderr,"Test vector FAILS\n");
+ fprintf(stderr,"Internal weighted error is %f\n",err);
+ return EXIT_FAILURE;
+ }
+ else{
+ fprintf(stderr,"Test vector PASSES\n");
+ fprintf(stderr,
+ "Opus quality metric: %.1f %% (internal weighted error is %f)\n",Q,err);
+ return EXIT_SUCCESS;
+ }
+}
diff --git a/drivers/opus/opus_config.h b/drivers/opus/opus_config.h
new file mode 100644
index 0000000000..c6470e92c3
--- /dev/null
+++ b/drivers/opus/opus_config.h
@@ -0,0 +1,121 @@
+/* Opus configuration header */
+/* Based on the output of libopus configure script */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `lrint' function. */
+#define HAVE_LRINT 1
+
+/* Define to 1 if you have the `lrintf' function. */
+#define HAVE_LRINTF 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#define LT_OBJDIR ".libs/"
+
+#ifdef OPUS_ARM_OPT
+/* Make use of ARM asm optimization */
+#define OPUS_ARM_ASM 1
+
+/* Use generic ARMv4 inline asm optimizations */
+#define OPUS_ARM_INLINE_ASM 1
+
+/* Use ARMv5E inline asm optimizations */
+#define OPUS_ARM_INLINE_EDSP 1
+
+/* Use ARMv6 inline asm optimizations */
+#define OPUS_ARM_INLINE_MEDIA 1
+
+/* Use ARM NEON inline asm optimizations */
+#define OPUS_ARM_INLINE_NEON 1
+
+/* Define if assembler supports EDSP instructions */
+#define OPUS_ARM_MAY_HAVE_EDSP 1
+
+/* Define if assembler supports ARMv6 media instructions */
+#define OPUS_ARM_MAY_HAVE_MEDIA 1
+
+/* Define if compiler supports NEON instructions */
+#define OPUS_ARM_MAY_HAVE_NEON 1
+#endif // OPUS_ARM_OPT
+
+#ifdef OPUS_ARM64_OPT
+/* Make use of ARM asm optimization */
+#define OPUS_ARM_ASM 1
+
+/* Use ARMv6 inline asm optimizations */
+#define OPUS_ARM_INLINE_MEDIA 1 // work
+
+/* Use ARM NEON inline asm optimizations */
+#define OPUS_ARM_INLINE_NEON 1 // work
+
+/* Define if assembler supports EDSP instructions */
+#define OPUS_ARM_MAY_HAVE_EDSP 1 // work
+
+/* Define if assembler supports ARMv6 media instructions */
+#define OPUS_ARM_MAY_HAVE_MEDIA 1 // work
+
+/* Define if compiler supports NEON instructions */
+#define OPUS_ARM_MAY_HAVE_NEON 1
+
+#endif // OPUS_ARM64_OPT
+
+/* This is a build of OPUS */
+#define OPUS_BUILD /**/
+
+#ifndef WIN32
+ /* Use C99 variable-size arrays */
+ #define VAR_ARRAYS 1
+#else
+ /* Fixes VS 2013 compile error */
+ #define USE_ALLOCA 1
+#endif
+
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+ calls it, or to nothing if 'inline' is not supported under any name. */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+
+/* Define to the equivalent of the C99 'restrict' keyword, or to
+ nothing if this is not supported. Do not define if restrict is
+ supported directly. */
+#define restrict __restrict
+/* Work around a bug in Sun C++: it does not support _Restrict or
+ __restrict__, even though the corresponding Sun C compiler ends up with
+ "#define restrict _Restrict" or "#define restrict __restrict__" in the
+ previous line. Perhaps some future version of Sun C++ will work with
+ restrict; if so, hopefully it defines __RESTRICT like Sun C does. */
+#if defined __SUNPRO_CC && !defined __RESTRICT
+# define _Restrict
+# define __restrict__
+#endif
diff --git a/drivers/opus/opus_custom.h b/drivers/opus/opus_custom.h
new file mode 100644
index 0000000000..41f36bf2fb
--- /dev/null
+++ b/drivers/opus/opus_custom.h
@@ -0,0 +1,342 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Copyright (c) 2008-2012 Gregory Maxwell
+ Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ @file opus_custom.h
+ @brief Opus-Custom reference implementation API
+ */
+
+#ifndef OPUS_CUSTOM_H
+#define OPUS_CUSTOM_H
+
+#include "opus_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CUSTOM_MODES
+# define OPUS_CUSTOM_EXPORT OPUS_EXPORT
+# define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT
+#else
+# define OPUS_CUSTOM_EXPORT
+# ifdef OPUS_BUILD
+# define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE
+# else
+# define OPUS_CUSTOM_EXPORT_STATIC
+# endif
+#endif
+
+/** @defgroup opus_custom Opus Custom
+ * @{
+ * Opus Custom is an optional part of the Opus specification and
+ * reference implementation which uses a distinct API from the regular
+ * API and supports frame sizes that are not normally supported.\ Use
+ * of Opus Custom is discouraged for all but very special applications
+ * for which a frame size different from 2.5, 5, 10, or 20 ms is needed
+ * (for either complexity or latency reasons) and where interoperability
+ * is less important.
+ *
+ * In addition to the interoperability limitations the use of Opus custom
+ * disables a substantial chunk of the codec and generally lowers the
+ * quality available at a given bitrate. Normally when an application needs
+ * a different frame size from the codec it should buffer to match the
+ * sizes but this adds a small amount of delay which may be important
+ * in some very low latency applications. Some transports (especially
+ * constant rate RF transports) may also work best with frames of
+ * particular durations.
+ *
+ * Libopus only supports custom modes if they are enabled at compile time.
+ *
+ * The Opus Custom API is similar to the regular API but the
+ * @ref opus_encoder_create and @ref opus_decoder_create calls take
+ * an additional mode parameter which is a structure produced by
+ * a call to @ref opus_custom_mode_create. Both the encoder and decoder
+ * must create a mode using the same sample rate (fs) and frame size
+ * (frame size) so these parameters must either be signaled out of band
+ * or fixed in a particular implementation.
+ *
+ * Similar to regular Opus the custom modes support on the fly frame size
+ * switching, but the sizes available depend on the particular frame size in
+ * use. For some initial frame sizes on a single on the fly size is available.
+ */
+
+/** Contains the state of an encoder. One encoder state is needed
+ for each stream. It is initialized once at the beginning of the
+ stream. Do *not* re-initialize the state for every frame.
+ @brief Encoder state
+ */
+typedef struct OpusCustomEncoder OpusCustomEncoder;
+
+/** State of the decoder. One decoder state is needed for each stream.
+ It is initialized once at the beginning of the stream. Do *not*
+ re-initialize the state for every frame.
+ @brief Decoder state
+ */
+typedef struct OpusCustomDecoder OpusCustomDecoder;
+
+/** The mode contains all the information necessary to create an
+ encoder. Both the encoder and decoder need to be initialized
+ with exactly the same mode, otherwise the output will be
+ corrupted.
+ @brief Mode configuration
+ */
+typedef struct OpusCustomMode OpusCustomMode;
+
+/** Creates a new mode struct. This will be passed to an encoder or
+ * decoder. The mode MUST NOT BE DESTROYED until the encoders and
+ * decoders that use it are destroyed as well.
+ * @param [in] Fs <tt>int</tt>: Sampling rate (8000 to 96000 Hz)
+ * @param [in] frame_size <tt>int</tt>: Number of samples (per channel) to encode in each
+ * packet (64 - 1024, prime factorization must contain zero or more 2s, 3s, or 5s and no other primes)
+ * @param [out] error <tt>int*</tt>: Returned error code (if NULL, no error will be returned)
+ * @return A newly created mode
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error);
+
+/** Destroys a mode struct. Only call this after all encoders and
+ * decoders using this mode are destroyed as well.
+ * @param [in] mode <tt>OpusCustomMode*</tt>: Mode to be freed.
+ */
+OPUS_CUSTOM_EXPORT void opus_custom_mode_destroy(OpusCustomMode *mode);
+
+
+#if !defined(OPUS_BUILD) || defined(CELT_ENCODER_C)
+
+/* Encoder */
+/** Gets the size of an OpusCustomEncoder structure.
+ * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration
+ * @param [in] channels <tt>int</tt>: Number of channels
+ * @returns size
+ */
+OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size(
+ const OpusCustomMode *mode,
+ int channels
+) OPUS_ARG_NONNULL(1);
+
+# ifdef CUSTOM_MODES
+/** Initializes a previously allocated encoder state
+ * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size.
+ * This is intended for applications which use their own allocator instead of malloc.
+ * @see opus_custom_encoder_create(),opus_custom_encoder_get_size()
+ * To reset a previously initialized state use the OPUS_RESET_STATE CTL.
+ * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+ * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of
+ * the stream (must be the same characteristics as used for the
+ * decoder)
+ * @param [in] channels <tt>int</tt>: Number of channels
+ * @return OPUS_OK Success or @ref opus_errorcodes
+ */
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_init(
+ OpusCustomEncoder *st,
+ const OpusCustomMode *mode,
+ int channels
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+# endif
+#endif
+
+
+/** Creates a new encoder state. Each stream needs its own encoder
+ * state (can't be shared across simultaneous streams).
+ * @param [in] mode <tt>OpusCustomMode*</tt>: Contains all the information about the characteristics of
+ * the stream (must be the same characteristics as used for the
+ * decoder)
+ * @param [in] channels <tt>int</tt>: Number of channels
+ * @param [out] error <tt>int*</tt>: Returns an error code
+ * @return Newly created encoder state.
+*/
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encoder_create(
+ const OpusCustomMode *mode,
+ int channels,
+ int *error
+) OPUS_ARG_NONNULL(1);
+
+
+/** Destroys a an encoder state.
+ * @param[in] st <tt>OpusCustomEncoder*</tt>: State to be freed.
+ */
+OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st);
+
+/** Encodes a frame of audio.
+ * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+ * @param [in] pcm <tt>float*</tt>: PCM audio in float format, with a normal range of +/-1.0.
+ * Samples with a range beyond +/-1.0 are supported but will
+ * be clipped by decoders using the integer API and should
+ * only be used if it is known that the far end supports
+ * extended dynamic range. There must be exactly
+ * frame_size samples per channel.
+ * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal
+ * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long.
+ * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame
+ * (can change from one frame to another)
+ * @return Number of bytes written to "compressed".
+ * If negative, an error has occurred (see error codes). It is IMPORTANT that
+ * the length returned be somehow transmitted to the decoder. Otherwise, no
+ * decoding is possible.
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode_float(
+ OpusCustomEncoder *st,
+ const float *pcm,
+ int frame_size,
+ unsigned char *compressed,
+ int maxCompressedBytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes a frame of audio.
+ * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+ * @param [in] pcm <tt>opus_int16*</tt>: PCM audio in signed 16-bit format (native endian).
+ * There must be exactly frame_size samples per channel.
+ * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal
+ * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long.
+ * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame
+ * (can change from one frame to another)
+ * @return Number of bytes written to "compressed".
+ * If negative, an error has occurred (see error codes). It is IMPORTANT that
+ * the length returned be somehow transmitted to the decoder. Otherwise, no
+ * decoding is possible.
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode(
+ OpusCustomEncoder *st,
+ const opus_int16 *pcm,
+ int frame_size,
+ unsigned char *compressed,
+ int maxCompressedBytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus custom encoder.
+ *
+ * Generally the request and subsequent arguments are generated
+ * by a convenience macro.
+ * @see opus_encoderctls
+ */
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_ctl(OpusCustomEncoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1);
+
+
+#if !defined(OPUS_BUILD) || defined(CELT_DECODER_C)
+/* Decoder */
+
+/** Gets the size of an OpusCustomDecoder structure.
+ * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration
+ * @param [in] channels <tt>int</tt>: Number of channels
+ * @returns size
+ */
+OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_decoder_get_size(
+ const OpusCustomMode *mode,
+ int channels
+) OPUS_ARG_NONNULL(1);
+
+/** Initializes a previously allocated decoder state
+ * The memory pointed to by st must be the size returned by opus_custom_decoder_get_size.
+ * This is intended for applications which use their own allocator instead of malloc.
+ * @see opus_custom_decoder_create(),opus_custom_decoder_get_size()
+ * To reset a previously initialized state use the OPUS_RESET_STATE CTL.
+ * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+ * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of
+ * the stream (must be the same characteristics as used for the
+ * encoder)
+ * @param [in] channels <tt>int</tt>: Number of channels
+ * @return OPUS_OK Success or @ref opus_errorcodes
+ */
+OPUS_CUSTOM_EXPORT_STATIC int opus_custom_decoder_init(
+ OpusCustomDecoder *st,
+ const OpusCustomMode *mode,
+ int channels
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+#endif
+
+
+/** Creates a new decoder state. Each stream needs its own decoder state (can't
+ * be shared across simultaneous streams).
+ * @param [in] mode <tt>OpusCustomMode</tt>: Contains all the information about the characteristics of the
+ * stream (must be the same characteristics as used for the encoder)
+ * @param [in] channels <tt>int</tt>: Number of channels
+ * @param [out] error <tt>int*</tt>: Returns an error code
+ * @return Newly created decoder state.
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decoder_create(
+ const OpusCustomMode *mode,
+ int channels,
+ int *error
+) OPUS_ARG_NONNULL(1);
+
+/** Destroys a an decoder state.
+ * @param[in] st <tt>OpusCustomDecoder*</tt>: State to be freed.
+ */
+OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st);
+
+/** Decode an opus custom frame with floating point output
+ * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+ * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+ * @param [in] len <tt>int</tt>: Number of bytes in payload
+ * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length
+ * is frame_size*channels*sizeof(float)
+ * @param [in] frame_size Number of samples per channel of available space in *pcm.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode_float(
+ OpusCustomDecoder *st,
+ const unsigned char *data,
+ int len,
+ float *pcm,
+ int frame_size
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode an opus custom frame
+ * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+ * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+ * @param [in] len <tt>int</tt>: Number of bytes in payload
+ * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length
+ * is frame_size*channels*sizeof(opus_int16)
+ * @param [in] frame_size Number of samples per channel of available space in *pcm.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode(
+ OpusCustomDecoder *st,
+ const unsigned char *data,
+ int len,
+ opus_int16 *pcm,
+ int frame_size
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus custom decoder.
+ *
+ * Generally the request and subsequent arguments are generated
+ * by a convenience macro.
+ * @see opus_genericctls
+ */
+OPUS_CUSTOM_EXPORT int opus_custom_decoder_ctl(OpusCustomDecoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_CUSTOM_H */
diff --git a/drivers/opus/opus_decoder.c b/drivers/opus/opus_decoder.c
new file mode 100644
index 0000000000..c5d4cc6aaa
--- /dev/null
+++ b/drivers/opus/opus_decoder.c
@@ -0,0 +1,970 @@
+/* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited
+ Written by Jean-Marc Valin and Koen Vos */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+# include "opus_config.h"
+#endif
+
+#ifndef OPUS_BUILD
+# error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details."
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
+# pragma message "You appear to be compiling without optimization, if so opus will be very slow."
+#endif
+
+#include <stdarg.h>
+#include "celt.h"
+#include "opus.h"
+#include "entdec.h"
+#include "opus_modes.h"
+#include "API.h"
+#include "stack_alloc.h"
+#include "float_cast.h"
+#include "opus_private.h"
+#include "os_support.h"
+#include "structs.h"
+#include "define.h"
+#include "mathops.h"
+#include "cpu_support.h"
+
+struct OpusDecoder {
+ int celt_dec_offset;
+ int silk_dec_offset;
+ int channels;
+ opus_int32 Fs; /** Sampling rate (at the API level) */
+ silk_DecControlStruct DecControl;
+ int decode_gain;
+
+ /* Everything beyond this point gets cleared on a reset */
+#define OPUS_DECODER_RESET_START stream_channels
+ int stream_channels;
+
+ int bandwidth;
+ int mode;
+ int prev_mode;
+ int frame_size;
+ int prev_redundancy;
+ int last_packet_duration;
+#ifndef OPUS_FIXED_POINT
+ opus_val16 softclip_mem[2];
+#endif
+
+ opus_uint32 rangeFinal;
+};
+
+#ifdef OPUS_FIXED_POINT
+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+ return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
+}
+#endif
+
+
+int opus_decoder_get_size(int channels)
+{
+ int silkDecSizeBytes, celtDecSizeBytes;
+ int ret;
+ if (channels<1 || channels > 2)
+ return 0;
+ ret = silk_Get_Decoder_Size( &silkDecSizeBytes );
+ if(ret)
+ return 0;
+ silkDecSizeBytes = align(silkDecSizeBytes);
+ celtDecSizeBytes = celt_decoder_get_size(channels);
+ return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes;
+}
+
+int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
+{
+ void *silk_dec;
+ CELTDecoder *celt_dec;
+ int ret, silkDecSizeBytes;
+
+ if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)
+ || (channels!=1&&channels!=2))
+ return OPUS_BAD_ARG;
+
+ OPUS_CLEAR((char*)st, opus_decoder_get_size(channels));
+ /* Initialize SILK encoder */
+ ret = silk_Get_Decoder_Size(&silkDecSizeBytes);
+ if (ret)
+ return OPUS_INTERNAL_ERROR;
+
+ silkDecSizeBytes = align(silkDecSizeBytes);
+ st->silk_dec_offset = align(sizeof(OpusDecoder));
+ st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes;
+ silk_dec = (char*)st+st->silk_dec_offset;
+ celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+ st->stream_channels = st->channels = channels;
+
+ st->Fs = Fs;
+ st->DecControl.API_sampleRate = st->Fs;
+ st->DecControl.nChannelsAPI = st->channels;
+
+ /* Reset decoder */
+ ret = silk_InitDecoder( silk_dec );
+ if(ret)return OPUS_INTERNAL_ERROR;
+
+ /* Initialize CELT decoder */
+ ret = celt_decoder_init(celt_dec, Fs, channels);
+ if(ret!=OPUS_OK)return OPUS_INTERNAL_ERROR;
+
+ celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0));
+
+ st->prev_mode = 0;
+ st->frame_size = Fs/400;
+ return OPUS_OK;
+}
+
+OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error)
+{
+ int ret;
+ OpusDecoder *st;
+ if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)
+ || (channels!=1&&channels!=2))
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+ st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels));
+ if (st == NULL)
+ {
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ return NULL;
+ }
+ ret = opus_decoder_init(st, Fs, channels);
+ if (error)
+ *error = ret;
+ if (ret != OPUS_OK)
+ {
+ opus_free(st);
+ st = NULL;
+ }
+ return st;
+}
+
+static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2,
+ opus_val16 *out, int overlap, int channels,
+ const opus_val16 *window, opus_int32 Fs)
+{
+ int i, c;
+ int inc = 48000/Fs;
+ for (c=0;c<channels;c++)
+ {
+ for (i=0;i<overlap;i++)
+ {
+ opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+ out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
+ Q15ONE-w, in1[i*channels+c]), 15);
+ }
+ }
+}
+
+static int opus_packet_get_mode(const unsigned char *data)
+{
+ int mode;
+ if (data[0]&0x80)
+ {
+ mode = MODE_CELT_ONLY;
+ } else if ((data[0]&0x60) == 0x60)
+ {
+ mode = MODE_HYBRID;
+ } else {
+ mode = MODE_SILK_ONLY;
+ }
+ return mode;
+}
+
+static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+ void *silk_dec;
+ CELTDecoder *celt_dec;
+ int i, silk_ret=0, celt_ret=0;
+ ec_dec dec;
+ opus_int32 silk_frame_size;
+ int pcm_silk_size;
+ VARDECL(opus_int16, pcm_silk);
+ int pcm_transition_silk_size;
+ VARDECL(opus_val16, pcm_transition_silk);
+ int pcm_transition_celt_size;
+ VARDECL(opus_val16, pcm_transition_celt);
+ opus_val16 *pcm_transition;
+ int redundant_audio_size;
+ VARDECL(opus_val16, redundant_audio);
+
+ int audiosize;
+ int mode;
+ int transition=0;
+ int start_band;
+ int redundancy=0;
+ int redundancy_bytes = 0;
+ int celt_to_silk=0;
+ int c;
+ int F2_5, F5, F10, F20;
+ const opus_val16 *window;
+ opus_uint32 redundant_rng = 0;
+ ALLOC_STACK;
+
+ silk_dec = (char*)st+st->silk_dec_offset;
+ celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+ F20 = st->Fs/50;
+ F10 = F20>>1;
+ F5 = F10>>1;
+ F2_5 = F5>>1;
+ if (frame_size < F2_5)
+ {
+ RESTORE_STACK;
+ return OPUS_BUFFER_TOO_SMALL;
+ }
+ /* Limit frame_size to avoid excessive stack allocations. */
+ frame_size = IMIN(frame_size, st->Fs/25*3);
+ /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */
+ if (len<=1)
+ {
+ data = NULL;
+ /* In that case, don't conceal more than what the ToC says */
+ frame_size = IMIN(frame_size, st->frame_size);
+ }
+ if (data != NULL)
+ {
+ audiosize = st->frame_size;
+ mode = st->mode;
+ ec_dec_init(&dec,(unsigned char*)data,len);
+ } else {
+ audiosize = frame_size;
+ mode = st->prev_mode;
+
+ if (mode == 0)
+ {
+ /* If we haven't got any packet yet, all we can do is return zeros */
+ for (i=0;i<audiosize*st->channels;i++)
+ pcm[i] = 0;
+ RESTORE_STACK;
+ return audiosize;
+ }
+
+ /* Avoids trying to run the PLC on sizes other than 2.5 (CELT), 5 (CELT),
+ 10, or 20 (e.g. 12.5 or 30 ms). */
+ if (audiosize > F20)
+ {
+ do {
+ int ret = opus_decode_frame(st, NULL, 0, pcm, IMIN(audiosize, F20), 0);
+ if (ret<0)
+ {
+ RESTORE_STACK;
+ return ret;
+ }
+ pcm += ret*st->channels;
+ audiosize -= ret;
+ } while (audiosize > 0);
+ RESTORE_STACK;
+ return frame_size;
+ } else if (audiosize < F20)
+ {
+ if (audiosize > F10)
+ audiosize = F10;
+ else if (mode != MODE_SILK_ONLY && audiosize > F5 && audiosize < F10)
+ audiosize = F5;
+ }
+ }
+
+ pcm_transition_silk_size = ALLOC_NONE;
+ pcm_transition_celt_size = ALLOC_NONE;
+ if (data!=NULL && st->prev_mode > 0 && (
+ (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy)
+ || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) )
+ )
+ {
+ transition = 1;
+ /* Decide where to allocate the stack memory for pcm_transition */
+ if (mode == MODE_CELT_ONLY)
+ pcm_transition_celt_size = F5*st->channels;
+ else
+ pcm_transition_silk_size = F5*st->channels;
+ }
+ ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16);
+ if (transition && mode == MODE_CELT_ONLY)
+ {
+ pcm_transition = pcm_transition_celt;
+ opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+ }
+ if (audiosize > frame_size)
+ {
+ /*fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);*/
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ } else {
+ frame_size = audiosize;
+ }
+
+ /* Don't allocate any memory when in CELT-only mode */
+ pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
+ ALLOC(pcm_silk, pcm_silk_size, opus_int16);
+
+ /* SILK processing */
+ if (mode != MODE_CELT_ONLY)
+ {
+ int lost_flag, decoded_samples;
+ opus_int16 *pcm_ptr = pcm_silk;
+
+ if (st->prev_mode==MODE_CELT_ONLY)
+ silk_InitDecoder( silk_dec );
+
+ /* The SILK PLC cannot produce frames of less than 10 ms */
+ st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
+
+ if (data != NULL)
+ {
+ st->DecControl.nChannelsInternal = st->stream_channels;
+ if( mode == MODE_SILK_ONLY ) {
+ if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) {
+ st->DecControl.internalSampleRate = 8000;
+ } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) {
+ st->DecControl.internalSampleRate = 12000;
+ } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) {
+ st->DecControl.internalSampleRate = 16000;
+ } else {
+ st->DecControl.internalSampleRate = 16000;
+ silk_assert( 0 );
+ }
+ } else {
+ /* Hybrid mode */
+ st->DecControl.internalSampleRate = 16000;
+ }
+ }
+
+ lost_flag = data == NULL ? 1 : 2 * decode_fec;
+ decoded_samples = 0;
+ do {
+ /* Call SILK decoder */
+ int first_frame = decoded_samples == 0;
+ silk_ret = silk_Decode( silk_dec, &st->DecControl,
+ lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
+ if( silk_ret ) {
+ if (lost_flag) {
+ /* PLC failure should not be fatal */
+ silk_frame_size = frame_size;
+ for (i=0;i<frame_size*st->channels;i++)
+ pcm_ptr[i] = 0;
+ } else {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ }
+ pcm_ptr += silk_frame_size * st->channels;
+ decoded_samples += silk_frame_size;
+ } while( decoded_samples < frame_size );
+ }
+
+ start_band = 0;
+ if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL
+ && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len)
+ {
+ /* Check if we have a redundant 0-8 kHz band */
+ if (mode == MODE_HYBRID)
+ redundancy = ec_dec_bit_logp(&dec, 12);
+ else
+ redundancy = 1;
+ if (redundancy)
+ {
+ celt_to_silk = ec_dec_bit_logp(&dec, 1);
+ /* redundancy_bytes will be at least two, in the non-hybrid
+ case due to the ec_tell() check above */
+ redundancy_bytes = mode==MODE_HYBRID ?
+ (opus_int32)ec_dec_uint(&dec, 256)+2 :
+ len-((ec_tell(&dec)+7)>>3);
+ len -= redundancy_bytes;
+ /* This is a sanity check. It should never happen for a valid
+ packet, so the exact behaviour is not normative. */
+ if (len*8 < ec_tell(&dec))
+ {
+ len = 0;
+ redundancy_bytes = 0;
+ redundancy = 0;
+ }
+ /* Shrink decoder because of raw bits */
+ dec.storage -= redundancy_bytes;
+ }
+ }
+ if (mode != MODE_CELT_ONLY)
+ start_band = 17;
+
+ {
+ int endband=21;
+
+ switch(st->bandwidth)
+ {
+ case OPUS_BANDWIDTH_NARROWBAND:
+ endband = 13;
+ break;
+ case OPUS_BANDWIDTH_MEDIUMBAND:
+ case OPUS_BANDWIDTH_WIDEBAND:
+ endband = 17;
+ break;
+ case OPUS_BANDWIDTH_SUPERWIDEBAND:
+ endband = 19;
+ break;
+ case OPUS_BANDWIDTH_FULLBAND:
+ endband = 21;
+ break;
+ }
+ celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband));
+ celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels));
+ }
+
+ if (redundancy)
+ {
+ transition = 0;
+ pcm_transition_silk_size=ALLOC_NONE;
+ }
+
+ ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16);
+
+ if (transition && mode != MODE_CELT_ONLY)
+ {
+ pcm_transition = pcm_transition_silk;
+ opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+ }
+
+ /* Only allocation memory for redundancy if/when needed */
+ redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE;
+ ALLOC(redundant_audio, redundant_audio_size, opus_val16);
+
+ /* 5 ms redundant frame for CELT->SILK*/
+ if (redundancy && celt_to_silk)
+ {
+ celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+ celt_decode_with_ec(celt_dec, data+len, redundancy_bytes,
+ redundant_audio, F5, NULL);
+ celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
+ }
+
+ /* MUST be after PLC */
+ celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band));
+
+ if (mode != MODE_SILK_ONLY)
+ {
+ int celt_frame_size = IMIN(F20, frame_size);
+ /* Make sure to discard any previous CELT state */
+ if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy)
+ celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+ /* Decode CELT */
+ celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,
+ len, pcm, celt_frame_size, &dec);
+ } else {
+ unsigned char silence[2] = {0xFF, 0xFF};
+ for (i=0;i<frame_size*st->channels;i++)
+ pcm[i] = 0;
+ /* For hybrid -> SILK transitions, we let the CELT MDCT
+ do a fade-out by decoding a silence frame */
+ if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) )
+ {
+ celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+ celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL);
+ }
+ }
+
+ if (mode != MODE_CELT_ONLY)
+ {
+#ifdef OPUS_FIXED_POINT
+ for (i=0;i<frame_size*st->channels;i++)
+ pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
+#else
+ for (i=0;i<frame_size*st->channels;i++)
+ pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
+#endif
+ }
+
+ {
+ const CELTMode *celt_mode;
+ celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode));
+ window = celt_mode->window;
+ }
+
+ /* 5 ms redundant frame for SILK->CELT */
+ if (redundancy && !celt_to_silk)
+ {
+ celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+ celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+
+ celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL);
+ celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
+ smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
+ pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
+ }
+ if (redundancy && celt_to_silk)
+ {
+ for (c=0;c<st->channels;c++)
+ {
+ for (i=0;i<F2_5;i++)
+ pcm[st->channels*i+c] = redundant_audio[st->channels*i+c];
+ }
+ smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5,
+ pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs);
+ }
+ if (transition)
+ {
+ if (audiosize >= F5)
+ {
+ for (i=0;i<st->channels*F2_5;i++)
+ pcm[i] = pcm_transition[i];
+ smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5,
+ pcm+st->channels*F2_5, F2_5,
+ st->channels, window, st->Fs);
+ } else {
+ /* Not enough time to do a clean transition, but we do it anyway
+ This will not preserve amplitude perfectly and may introduce
+ a bit of temporal aliasing, but it shouldn't be too bad and
+ that's pretty much the best we can do. In any case, generating this
+ transition it pretty silly in the first place */
+ smooth_fade(pcm_transition, pcm,
+ pcm, F2_5,
+ st->channels, window, st->Fs);
+ }
+ }
+
+ if(st->decode_gain)
+ {
+ opus_val32 gain;
+ gain = celt_exp2(MULT16_16_P15(QCONST16(6.48814081e-4f, 25), st->decode_gain));
+ for (i=0;i<frame_size*st->channels;i++)
+ {
+ opus_val32 x;
+ x = MULT16_32_P16(pcm[i],gain);
+ pcm[i] = SATURATE(x, 32767);
+ }
+ }
+
+ if (len <= 1)
+ st->rangeFinal = 0;
+ else
+ st->rangeFinal = dec.rng ^ redundant_rng;
+
+ st->prev_mode = mode;
+ st->prev_redundancy = redundancy && !celt_to_silk;
+
+ if (celt_ret>=0)
+ {
+ if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels))
+ OPUS_PRINT_INT(audiosize);
+ }
+
+ RESTORE_STACK;
+ return celt_ret < 0 ? celt_ret : audiosize;
+
+}
+
+int opus_decode_native(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec,
+ int self_delimited, opus_int32 *packet_offset, int soft_clip)
+{
+ int i, nb_samples;
+ int count, offset;
+ unsigned char toc;
+ int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels;
+ /* 48 x 2.5 ms = 120 ms */
+ opus_int16 size[48];
+ if (decode_fec<0 || decode_fec>1)
+ return OPUS_BAD_ARG;
+ /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */
+ if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0)
+ return OPUS_BAD_ARG;
+ if (len==0 || data==NULL)
+ {
+ int pcm_count=0;
+ do {
+ int ret;
+ ret = opus_decode_frame(st, NULL, 0, pcm+pcm_count*st->channels, frame_size-pcm_count, 0);
+ if (ret<0)
+ return ret;
+ pcm_count += ret;
+ } while (pcm_count < frame_size);
+ celt_assert(pcm_count == frame_size);
+ if (OPUS_CHECK_ARRAY(pcm, pcm_count*st->channels))
+ OPUS_PRINT_INT(pcm_count);
+ st->last_packet_duration = pcm_count;
+ return pcm_count;
+ } else if (len<0)
+ return OPUS_BAD_ARG;
+
+ packet_mode = opus_packet_get_mode(data);
+ packet_bandwidth = opus_packet_get_bandwidth(data);
+ packet_frame_size = opus_packet_get_samples_per_frame(data, st->Fs);
+ packet_stream_channels = opus_packet_get_nb_channels(data);
+
+ count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
+ size, &offset, packet_offset);
+ if (count<0)
+ return count;
+
+ data += offset;
+
+ if (decode_fec)
+ {
+ int duration_copy;
+ int ret;
+ /* If no FEC can be present, run the PLC (recursive call) */
+ if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY)
+ return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip);
+ /* Otherwise, run the PLC on everything except the size for which we might have FEC */
+ duration_copy = st->last_packet_duration;
+ if (frame_size-packet_frame_size!=0)
+ {
+ ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip);
+ if (ret<0)
+ {
+ st->last_packet_duration = duration_copy;
+ return ret;
+ }
+ celt_assert(ret==frame_size-packet_frame_size);
+ }
+ /* Complete with FEC */
+ st->mode = packet_mode;
+ st->bandwidth = packet_bandwidth;
+ st->frame_size = packet_frame_size;
+ st->stream_channels = packet_stream_channels;
+ ret = opus_decode_frame(st, data, size[0], pcm+st->channels*(frame_size-packet_frame_size),
+ packet_frame_size, 1);
+ if (ret<0)
+ return ret;
+ else {
+ if (OPUS_CHECK_ARRAY(pcm, frame_size*st->channels))
+ OPUS_PRINT_INT(frame_size);
+ st->last_packet_duration = frame_size;
+ return frame_size;
+ }
+ }
+
+ if (count*packet_frame_size > frame_size)
+ return OPUS_BUFFER_TOO_SMALL;
+
+ /* Update the state as the last step to avoid updating it on an invalid packet */
+ st->mode = packet_mode;
+ st->bandwidth = packet_bandwidth;
+ st->frame_size = packet_frame_size;
+ st->stream_channels = packet_stream_channels;
+
+ nb_samples=0;
+ for (i=0;i<count;i++)
+ {
+ int ret;
+ ret = opus_decode_frame(st, data, size[i], pcm+nb_samples*st->channels, frame_size-nb_samples, 0);
+ if (ret<0)
+ return ret;
+ celt_assert(ret==packet_frame_size);
+ data += size[i];
+ nb_samples += ret;
+ }
+ st->last_packet_duration = nb_samples;
+ if (OPUS_CHECK_ARRAY(pcm, nb_samples*st->channels))
+ OPUS_PRINT_INT(nb_samples);
+#ifndef OPUS_FIXED_POINT
+ if (soft_clip)
+ opus_pcm_soft_clip(pcm, nb_samples, st->channels, st->softclip_mem);
+ else
+ st->softclip_mem[0]=st->softclip_mem[1]=0;
+#endif
+ return nb_samples;
+}
+
+#ifdef OPUS_FIXED_POINT
+
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+ if(frame_size<=0)
+ return OPUS_BAD_ARG;
+ return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_decode_float(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, float *pcm, int frame_size, int decode_fec)
+{
+ VARDECL(opus_int16, out);
+ int ret, i;
+ ALLOC_STACK;
+
+ if(frame_size<=0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ ALLOC(out, frame_size*st->channels, opus_int16);
+
+ ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0);
+ if (ret > 0)
+ {
+ for (i=0;i<ret*st->channels;i++)
+ pcm[i] = (1.f/32768.f)*(out[i]);
+ }
+ RESTORE_STACK;
+ return ret;
+}
+#endif
+
+
+#else
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
+{
+ VARDECL(float, out);
+ int ret, i;
+ ALLOC_STACK;
+
+ if(frame_size<=0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+
+ ALLOC(out, frame_size*st->channels, float);
+
+ ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1);
+ if (ret > 0)
+ {
+ for (i=0;i<ret*st->channels;i++)
+ pcm[i] = FLOAT2INT16(out[i]);
+ }
+ RESTORE_STACK;
+ return ret;
+}
+
+int opus_decode_float(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+ if(frame_size<=0)
+ return OPUS_BAD_ARG;
+ return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+}
+
+#endif
+
+int opus_decoder_ctl(OpusDecoder *st, int request, ...)
+{
+ int ret = OPUS_OK;
+ va_list ap;
+ void *silk_dec;
+ CELTDecoder *celt_dec;
+
+ silk_dec = (char*)st+st->silk_dec_offset;
+ celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+
+
+ va_start(ap, request);
+
+ switch (request)
+ {
+ case OPUS_GET_BANDWIDTH_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->bandwidth;
+ }
+ break;
+ case OPUS_GET_FINAL_RANGE_REQUEST:
+ {
+ opus_uint32 *value = va_arg(ap, opus_uint32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->rangeFinal;
+ }
+ break;
+ case OPUS_RESET_STATE:
+ {
+ OPUS_CLEAR((char*)&st->OPUS_DECODER_RESET_START,
+ sizeof(OpusDecoder)-
+ ((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
+
+ celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+ silk_InitDecoder( silk_dec );
+ st->stream_channels = st->channels;
+ st->frame_size = st->Fs/400;
+ }
+ break;
+ case OPUS_GET_SAMPLE_RATE_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->Fs;
+ }
+ break;
+ case OPUS_GET_PITCH_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ if (st->prev_mode == MODE_CELT_ONLY)
+ celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value));
+ else
+ *value = st->DecControl.prevPitchLag;
+ }
+ break;
+ case OPUS_GET_GAIN_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->decode_gain;
+ }
+ break;
+ case OPUS_SET_GAIN_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<-32768 || value>32767)
+ {
+ goto bad_arg;
+ }
+ st->decode_gain = value;
+ }
+ break;
+ case OPUS_GET_LAST_PACKET_DURATION_REQUEST:
+ {
+ opus_uint32 *value = va_arg(ap, opus_uint32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->last_packet_duration;
+ }
+ break;
+ default:
+ /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/
+ ret = OPUS_UNIMPLEMENTED;
+ break;
+ }
+
+ va_end(ap);
+ return ret;
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
+}
+
+void opus_decoder_destroy(OpusDecoder *st)
+{
+ opus_free(st);
+}
+
+
+int opus_packet_get_bandwidth(const unsigned char *data)
+{
+ int bandwidth;
+ if (data[0]&0x80)
+ {
+ bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3);
+ if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+ bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ } else if ((data[0]&0x60) == 0x60)
+ {
+ bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND :
+ OPUS_BANDWIDTH_SUPERWIDEBAND;
+ } else {
+ bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3);
+ }
+ return bandwidth;
+}
+
+int opus_packet_get_samples_per_frame(const unsigned char *data,
+ opus_int32 Fs)
+{
+ int audiosize;
+ if (data[0]&0x80)
+ {
+ audiosize = ((data[0]>>3)&0x3);
+ audiosize = (Fs<<audiosize)/400;
+ } else if ((data[0]&0x60) == 0x60)
+ {
+ audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
+ } else {
+ audiosize = ((data[0]>>3)&0x3);
+ if (audiosize == 3)
+ audiosize = Fs*60/1000;
+ else
+ audiosize = (Fs<<audiosize)/100;
+ }
+ return audiosize;
+}
+
+int opus_packet_get_nb_channels(const unsigned char *data)
+{
+ return (data[0]&0x4) ? 2 : 1;
+}
+
+int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len)
+{
+ int count;
+ if (len<1)
+ return OPUS_BAD_ARG;
+ count = packet[0]&0x3;
+ if (count==0)
+ return 1;
+ else if (count!=3)
+ return 2;
+ else if (len<2)
+ return OPUS_INVALID_PACKET;
+ else
+ return packet[1]&0x3F;
+}
+
+int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len,
+ opus_int32 Fs)
+{
+ int samples;
+ int count = opus_packet_get_nb_frames(packet, len);
+
+ if (count<0)
+ return count;
+
+ samples = count*opus_packet_get_samples_per_frame(packet, Fs);
+ /* Can't have more than 120 ms */
+ if (samples*25 > Fs*3)
+ return OPUS_INVALID_PACKET;
+ else
+ return samples;
+}
+
+int opus_decoder_get_nb_samples(const OpusDecoder *dec,
+ const unsigned char packet[], opus_int32 len)
+{
+ return opus_packet_get_nb_samples(packet, len, dec->Fs);
+}
diff --git a/drivers/opus/opus_defines.h b/drivers/opus/opus_defines.h
new file mode 100644
index 0000000000..265089f65e
--- /dev/null
+++ b/drivers/opus/opus_defines.h
@@ -0,0 +1,726 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+ Written by Jean-Marc Valin and Koen Vos */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus_defines.h
+ * @brief Opus reference implementation constants
+ */
+
+#ifndef OPUS_DEFINES_H
+#define OPUS_DEFINES_H
+
+#include "opus_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @defgroup opus_errorcodes Error codes
+ * @{
+ */
+/** No error @hideinitializer*/
+#define OPUS_OK 0
+/** One or more invalid/out of range arguments @hideinitializer*/
+#define OPUS_BAD_ARG -1
+/** The mode struct passed is invalid @hideinitializer*/
+#define OPUS_BUFFER_TOO_SMALL -2
+/** An internal error was detected @hideinitializer*/
+#define OPUS_INTERNAL_ERROR -3
+/** The compressed data passed is corrupted @hideinitializer*/
+#define OPUS_INVALID_PACKET -4
+/** Invalid/unsupported request number @hideinitializer*/
+#define OPUS_UNIMPLEMENTED -5
+/** An encoder or decoder structure is invalid or already freed @hideinitializer*/
+#define OPUS_INVALID_STATE -6
+/** Memory allocation has failed @hideinitializer*/
+#define OPUS_ALLOC_FAIL -7
+/**@}*/
+
+/** @cond OPUS_INTERNAL_DOC */
+/**Export control for opus functions */
+
+#ifndef OPUS_EXPORT
+# if defined(WIN32)
+# ifdef OPUS_BUILD
+# define OPUS_EXPORT __declspec(dllexport)
+# else
+# define OPUS_EXPORT
+# endif
+# elif defined(__GNUC__) && defined(OPUS_BUILD)
+# define OPUS_EXPORT __attribute__ ((visibility ("default")))
+# else
+# define OPUS_EXPORT
+# endif
+#endif
+
+# if !defined(OPUS_GNUC_PREREQ)
+# if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+# define OPUS_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+# else
+# define OPUS_GNUC_PREREQ(_maj,_min) 0
+# endif
+# endif
+
+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(3,0)
+# define OPUS_RESTRICT __restrict__
+# elif (defined(_MSC_VER) && _MSC_VER >= 1400)
+# define OPUS_RESTRICT __restrict
+# else
+# define OPUS_RESTRICT
+# endif
+#else
+# define OPUS_RESTRICT restrict
+#endif
+
+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(2,7)
+# define OPUS_INLINE __inline__
+# elif (defined(_MSC_VER))
+# define OPUS_INLINE __inline
+# else
+# define OPUS_INLINE
+# endif
+#else
+# define OPUS_INLINE inline
+#endif
+
+/**Warning attributes for opus functions
+ * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out
+ * some paranoid null checks. */
+#if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4)
+# define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
+#else
+# define OPUS_WARN_UNUSED_RESULT
+#endif
+#if !defined(OPUS_BUILD) && defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4)
+# define OPUS_ARG_NONNULL(_x) __attribute__ ((__nonnull__(_x)))
+#else
+# define OPUS_ARG_NONNULL(_x)
+#endif
+
+/** These are the actual Encoder CTL ID numbers.
+ * They should not be used directly by applications.
+ * In general, SETs should be even and GETs should be odd.*/
+#define OPUS_SET_APPLICATION_REQUEST 4000
+#define OPUS_GET_APPLICATION_REQUEST 4001
+#define OPUS_SET_BITRATE_REQUEST 4002
+#define OPUS_GET_BITRATE_REQUEST 4003
+#define OPUS_SET_MAX_BANDWIDTH_REQUEST 4004
+#define OPUS_GET_MAX_BANDWIDTH_REQUEST 4005
+#define OPUS_SET_VBR_REQUEST 4006
+#define OPUS_GET_VBR_REQUEST 4007
+#define OPUS_SET_BANDWIDTH_REQUEST 4008
+#define OPUS_GET_BANDWIDTH_REQUEST 4009
+#define OPUS_SET_COMPLEXITY_REQUEST 4010
+#define OPUS_GET_COMPLEXITY_REQUEST 4011
+#define OPUS_SET_INBAND_FEC_REQUEST 4012
+#define OPUS_GET_INBAND_FEC_REQUEST 4013
+#define OPUS_SET_PACKET_LOSS_PERC_REQUEST 4014
+#define OPUS_GET_PACKET_LOSS_PERC_REQUEST 4015
+#define OPUS_SET_DTX_REQUEST 4016
+#define OPUS_GET_DTX_REQUEST 4017
+#define OPUS_SET_VBR_CONSTRAINT_REQUEST 4020
+#define OPUS_GET_VBR_CONSTRAINT_REQUEST 4021
+#define OPUS_SET_FORCE_CHANNELS_REQUEST 4022
+#define OPUS_GET_FORCE_CHANNELS_REQUEST 4023
+#define OPUS_SET_SIGNAL_REQUEST 4024
+#define OPUS_GET_SIGNAL_REQUEST 4025
+#define OPUS_GET_LOOKAHEAD_REQUEST 4027
+/* #define OPUS_RESET_STATE 4028 */
+#define OPUS_GET_SAMPLE_RATE_REQUEST 4029
+#define OPUS_GET_FINAL_RANGE_REQUEST 4031
+#define OPUS_GET_PITCH_REQUEST 4033
+#define OPUS_SET_GAIN_REQUEST 4034
+#define OPUS_GET_GAIN_REQUEST 4045 /* Should have been 4035 */
+#define OPUS_SET_LSB_DEPTH_REQUEST 4036
+#define OPUS_GET_LSB_DEPTH_REQUEST 4037
+#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
+#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
+#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
+#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
+#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
+
+/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
+
+/* Macros to trigger compilation errors when the wrong types are provided to a CTL */
+#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
+#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr)))
+#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr)))
+#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr)))
+/** @endcond */
+
+/** @defgroup opus_ctlvalues Pre-defined values for CTL interface
+ * @see opus_genericctls, opus_encoderctls
+ * @{
+ */
+/* Values for the various encoder CTLs */
+#define OPUS_AUTO -1000 /**<Auto/default setting @hideinitializer*/
+#define OPUS_BITRATE_MAX -1 /**<Maximum bitrate @hideinitializer*/
+
+/** Best for most VoIP/videoconference applications where listening quality and intelligibility matter most
+ * @hideinitializer */
+#define OPUS_APPLICATION_VOIP 2048
+/** Best for broadcast/high-fidelity application where the decoded audio should be as close as possible to the input
+ * @hideinitializer */
+#define OPUS_APPLICATION_AUDIO 2049
+/** Only use when lowest-achievable latency is what matters most. Voice-optimized modes cannot be used.
+ * @hideinitializer */
+#define OPUS_APPLICATION_RESTRICTED_LOWDELAY 2051
+
+#define OPUS_SIGNAL_VOICE 3001 /**< Signal being encoded is voice */
+#define OPUS_SIGNAL_MUSIC 3002 /**< Signal being encoded is music */
+#define OPUS_BANDWIDTH_NARROWBAND 1101 /**< 4 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_MEDIUMBAND 1102 /**< 6 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_WIDEBAND 1103 /**< 8 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_SUPERWIDEBAND 1104 /**<12 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_FULLBAND 1105 /**<20 kHz bandpass @hideinitializer*/
+
+#define OPUS_FRAMESIZE_ARG 5000 /**< Select frame size from the argument (default) */
+#define OPUS_FRAMESIZE_2_5_MS 5001 /**< Use 2.5 ms frames */
+#define OPUS_FRAMESIZE_5_MS 5002 /**< Use 5 ms frames */
+#define OPUS_FRAMESIZE_10_MS 5003 /**< Use 10 ms frames */
+#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */
+#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */
+#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */
+
+/**@}*/
+
+
+/** @defgroup opus_encoderctls Encoder related CTLs
+ *
+ * These are convenience macros for use with the \c opus_encode_ctl
+ * interface. They are used to generate the appropriate series of
+ * arguments for that call, passing the correct type, size and so
+ * on as expected for each particular request.
+ *
+ * Some usage examples:
+ *
+ * @code
+ * int ret;
+ * ret = opus_encoder_ctl(enc_ctx, OPUS_SET_BANDWIDTH(OPUS_AUTO));
+ * if (ret != OPUS_OK) return ret;
+ *
+ * opus_int32 rate;
+ * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&rate));
+ *
+ * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE);
+ * @endcode
+ *
+ * @see opus_genericctls, opus_encoder
+ * @{
+ */
+
+/** Configures the encoder's computational complexity.
+ * The supported range is 0-10 inclusive with 10 representing the highest complexity.
+ * @see OPUS_GET_COMPLEXITY
+ * @param[in] x <tt>opus_int32</tt>: Allowed values: 0-10, inclusive.
+ *
+ * @hideinitializer */
+#define OPUS_SET_COMPLEXITY(x) OPUS_SET_COMPLEXITY_REQUEST, __opus_check_int(x)
+/** Gets the encoder's complexity configuration.
+ * @see OPUS_SET_COMPLEXITY
+ * @param[out] x <tt>opus_int32 *</tt>: Returns a value in the range 0-10,
+ * inclusive.
+ * @hideinitializer */
+#define OPUS_GET_COMPLEXITY(x) OPUS_GET_COMPLEXITY_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the bitrate in the encoder.
+ * Rates from 500 to 512000 bits per second are meaningful, as well as the
+ * special values #OPUS_AUTO and #OPUS_BITRATE_MAX.
+ * The value #OPUS_BITRATE_MAX can be used to cause the codec to use as much
+ * rate as it can, which is useful for controlling the rate by adjusting the
+ * output buffer size.
+ * @see OPUS_GET_BITRATE
+ * @param[in] x <tt>opus_int32</tt>: Bitrate in bits per second. The default
+ * is determined based on the number of
+ * channels and the input sampling rate.
+ * @hideinitializer */
+#define OPUS_SET_BITRATE(x) OPUS_SET_BITRATE_REQUEST, __opus_check_int(x)
+/** Gets the encoder's bitrate configuration.
+ * @see OPUS_SET_BITRATE
+ * @param[out] x <tt>opus_int32 *</tt>: Returns the bitrate in bits per second.
+ * The default is determined based on the
+ * number of channels and the input
+ * sampling rate.
+ * @hideinitializer */
+#define OPUS_GET_BITRATE(x) OPUS_GET_BITRATE_REQUEST, __opus_check_int_ptr(x)
+
+/** Enables or disables variable bitrate (VBR) in the encoder.
+ * The configured bitrate may not be met exactly because frames must
+ * be an integer number of bytes in length.
+ * @warning Only the MDCT mode of Opus can provide hard CBR behavior.
+ * @see OPUS_GET_VBR
+ * @see OPUS_SET_VBR_CONSTRAINT
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>0</dt><dd>Hard CBR. For LPC/hybrid modes at very low bit-rate, this can
+ * cause noticeable quality degradation.</dd>
+ * <dt>1</dt><dd>VBR (default). The exact type of VBR is controlled by
+ * #OPUS_SET_VBR_CONSTRAINT.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_VBR(x) OPUS_SET_VBR_REQUEST, __opus_check_int(x)
+/** Determine if variable bitrate (VBR) is enabled in the encoder.
+ * @see OPUS_SET_VBR
+ * @see OPUS_GET_VBR_CONSTRAINT
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>0</dt><dd>Hard CBR.</dd>
+ * <dt>1</dt><dd>VBR (default). The exact type of VBR may be retrieved via
+ * #OPUS_GET_VBR_CONSTRAINT.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_VBR(x) OPUS_GET_VBR_REQUEST, __opus_check_int_ptr(x)
+
+/** Enables or disables constrained VBR in the encoder.
+ * This setting is ignored when the encoder is in CBR mode.
+ * @warning Only the MDCT mode of Opus currently heeds the constraint.
+ * Speech mode ignores it completely, hybrid mode may fail to obey it
+ * if the LPC layer uses more bitrate than the constraint would have
+ * permitted.
+ * @see OPUS_GET_VBR_CONSTRAINT
+ * @see OPUS_SET_VBR
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>0</dt><dd>Unconstrained VBR.</dd>
+ * <dt>1</dt><dd>Constrained VBR (default). This creates a maximum of one
+ * frame of buffering delay assuming a transport with a
+ * serialization speed of the nominal bitrate.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_VBR_CONSTRAINT(x) OPUS_SET_VBR_CONSTRAINT_REQUEST, __opus_check_int(x)
+/** Determine if constrained VBR is enabled in the encoder.
+ * @see OPUS_SET_VBR_CONSTRAINT
+ * @see OPUS_GET_VBR
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>0</dt><dd>Unconstrained VBR.</dd>
+ * <dt>1</dt><dd>Constrained VBR (default).</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_VBR_CONSTRAINT(x) OPUS_GET_VBR_CONSTRAINT_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures mono/stereo forcing in the encoder.
+ * This can force the encoder to produce packets encoded as either mono or
+ * stereo, regardless of the format of the input audio. This is useful when
+ * the caller knows that the input signal is currently a mono source embedded
+ * in a stereo stream.
+ * @see OPUS_GET_FORCE_CHANNELS
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd>
+ * <dt>1</dt> <dd>Forced mono</dd>
+ * <dt>2</dt> <dd>Forced stereo</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_FORCE_CHANNELS(x) OPUS_SET_FORCE_CHANNELS_REQUEST, __opus_check_int(x)
+/** Gets the encoder's forced channel configuration.
+ * @see OPUS_SET_FORCE_CHANNELS
+ * @param[out] x <tt>opus_int32 *</tt>:
+ * <dl>
+ * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd>
+ * <dt>1</dt> <dd>Forced mono</dd>
+ * <dt>2</dt> <dd>Forced stereo</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_FORCE_CHANNELS(x) OPUS_GET_FORCE_CHANNELS_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the maximum bandpass that the encoder will select automatically.
+ * Applications should normally use this instead of #OPUS_SET_BANDWIDTH
+ * (leaving that set to the default, #OPUS_AUTO). This allows the
+ * application to set an upper bound based on the type of input it is
+ * providing, but still gives the encoder the freedom to reduce the bandpass
+ * when the bitrate becomes too low, for better overall quality.
+ * @see OPUS_GET_MAX_BANDWIDTH
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd>
+ * <dt>OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd>
+ * <dt>OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd>
+ * <dt>OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+ * <dt>OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband (default)</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_MAX_BANDWIDTH(x) OPUS_SET_MAX_BANDWIDTH_REQUEST, __opus_check_int(x)
+
+/** Gets the encoder's configured maximum allowed bandpass.
+ * @see OPUS_SET_MAX_BANDWIDTH
+ * @param[out] x <tt>opus_int32 *</tt>: Allowed values:
+ * <dl>
+ * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband (default)</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_MAX_BANDWIDTH(x) OPUS_GET_MAX_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
+
+/** Sets the encoder's bandpass to a specific value.
+ * This prevents the encoder from automatically selecting the bandpass based
+ * on the available bitrate. If an application knows the bandpass of the input
+ * audio it is providing, it should normally use #OPUS_SET_MAX_BANDWIDTH
+ * instead, which still gives the encoder the freedom to reduce the bandpass
+ * when the bitrate becomes too low, for better overall quality.
+ * @see OPUS_GET_BANDWIDTH
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>#OPUS_AUTO</dt> <dd>(default)</dd>
+ * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_BANDWIDTH(x) OPUS_SET_BANDWIDTH_REQUEST, __opus_check_int(x)
+
+/** Configures the type of signal being encoded.
+ * This is a hint which helps the encoder's mode selection.
+ * @see OPUS_GET_SIGNAL
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>#OPUS_AUTO</dt> <dd>(default)</dd>
+ * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd>
+ * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_SIGNAL(x) OPUS_SET_SIGNAL_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured signal type.
+ * @see OPUS_SET_SIGNAL
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>#OPUS_AUTO</dt> <dd>(default)</dd>
+ * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd>
+ * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_SIGNAL(x) OPUS_GET_SIGNAL_REQUEST, __opus_check_int_ptr(x)
+
+
+/** Configures the encoder's intended application.
+ * The initial value is a mandatory argument to the encoder_create function.
+ * @see OPUS_GET_APPLICATION
+ * @param[in] x <tt>opus_int32</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>#OPUS_APPLICATION_VOIP</dt>
+ * <dd>Process signal for improved speech intelligibility.</dd>
+ * <dt>#OPUS_APPLICATION_AUDIO</dt>
+ * <dd>Favor faithfulness to the original input.</dd>
+ * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+ * <dd>Configure the minimum possible coding delay by disabling certain modes
+ * of operation.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_APPLICATION(x) OPUS_SET_APPLICATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured application.
+ * @see OPUS_SET_APPLICATION
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>#OPUS_APPLICATION_VOIP</dt>
+ * <dd>Process signal for improved speech intelligibility.</dd>
+ * <dt>#OPUS_APPLICATION_AUDIO</dt>
+ * <dd>Favor faithfulness to the original input.</dd>
+ * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+ * <dd>Configure the minimum possible coding delay by disabling certain modes
+ * of operation.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the sampling rate the encoder or decoder was initialized with.
+ * This simply returns the <code>Fs</code> value passed to opus_encoder_init()
+ * or opus_decoder_init().
+ * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
+ * @hideinitializer
+ */
+#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the total samples of delay added by the entire codec.
+ * This can be queried by the encoder and then the provided number of samples can be
+ * skipped on from the start of the decoder's output to provide time aligned input
+ * and output. From the perspective of a decoding application the real data begins this many
+ * samples late.
+ *
+ * The decoder contribution to this delay is identical for all decoders, but the
+ * encoder portion of the delay may vary from implementation to implementation,
+ * version to version, or even depend on the encoder's initial configuration.
+ * Applications needing delay compensation should call this CTL rather than
+ * hard-coding a value.
+ * @param[out] x <tt>opus_int32 *</tt>: Number of lookahead samples
+ * @hideinitializer */
+#define OPUS_GET_LOOKAHEAD(x) OPUS_GET_LOOKAHEAD_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of inband forward error correction (FEC).
+ * @note This is only applicable to the LPC layer
+ * @see OPUS_GET_INBAND_FEC
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>0</dt><dd>Disable inband FEC (default).</dd>
+ * <dt>1</dt><dd>Enable inband FEC.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_INBAND_FEC(x) OPUS_SET_INBAND_FEC_REQUEST, __opus_check_int(x)
+/** Gets encoder's configured use of inband forward error correction.
+ * @see OPUS_SET_INBAND_FEC
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>0</dt><dd>Inband FEC disabled (default).</dd>
+ * <dt>1</dt><dd>Inband FEC enabled.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's expected packet loss percentage.
+ * Higher values with trigger progressively more loss resistant behavior in the encoder
+ * at the expense of quality at a given bitrate in the lossless case, but greater quality
+ * under loss.
+ * @see OPUS_GET_PACKET_LOSS_PERC
+ * @param[in] x <tt>opus_int32</tt>: Loss percentage in the range 0-100, inclusive (default: 0).
+ * @hideinitializer */
+#define OPUS_SET_PACKET_LOSS_PERC(x) OPUS_SET_PACKET_LOSS_PERC_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured packet loss percentage.
+ * @see OPUS_SET_PACKET_LOSS_PERC
+ * @param[out] x <tt>opus_int32 *</tt>: Returns the configured loss percentage
+ * in the range 0-100, inclusive (default: 0).
+ * @hideinitializer */
+#define OPUS_GET_PACKET_LOSS_PERC(x) OPUS_GET_PACKET_LOSS_PERC_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of discontinuous transmission (DTX).
+ * @note This is only applicable to the LPC layer
+ * @see OPUS_GET_DTX
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>0</dt><dd>Disable DTX (default).</dd>
+ * <dt>1</dt><dd>Enabled DTX.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_DTX(x) OPUS_SET_DTX_REQUEST, __opus_check_int(x)
+/** Gets encoder's configured use of discontinuous transmission.
+ * @see OPUS_SET_DTX
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>0</dt><dd>DTX disabled (default).</dd>
+ * <dt>1</dt><dd>DTX enabled.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x)
+/** Configures the depth of signal being encoded.
+ * This is a hint which helps the encoder identify silence and near-silence.
+ * @see OPUS_GET_LSB_DEPTH
+ * @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24
+ * (default: 24).
+ * @hideinitializer */
+#define OPUS_SET_LSB_DEPTH(x) OPUS_SET_LSB_DEPTH_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured signal depth.
+ * @see OPUS_SET_LSB_DEPTH
+ * @param[out] x <tt>opus_int32 *</tt>: Input precision in bits, between 8 and
+ * 24 (default: 24).
+ * @hideinitializer */
+#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
+ * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
+ * @hideinitializer */
+#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of variable duration frames.
+ * When variable duration is enabled, the encoder is free to use a shorter frame
+ * size than the one requested in the opus_encode*() call.
+ * It is then the user's responsibility
+ * to verify how much audio was encoded by checking the ToC byte of the encoded
+ * packet. The part of the audio that was not encoded needs to be resent to the
+ * encoder for the next call. Do not use this option unless you <b>really</b>
+ * know what you are doing.
+ * @see OPUS_GET_EXPERT_VARIABLE_DURATION
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
+ * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured use of variable duration frames.
+ * @see OPUS_SET_EXPERT_VARIABLE_DURATION
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
+ * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
+ * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** If set to 1, disables almost all use of prediction, making frames almost
+ completely independent. This reduces quality. (default : 0)
+ * @hideinitializer */
+#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured prediction status.
+ * @hideinitializer */
+#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_genericctls Generic CTLs
+ *
+ * These macros are used with the \c opus_decoder_ctl and
+ * \c opus_encoder_ctl calls to generate a particular
+ * request.
+ *
+ * When called on an \c OpusDecoder they apply to that
+ * particular decoder instance. When called on an
+ * \c OpusEncoder they apply to the corresponding setting
+ * on that encoder instance, if present.
+ *
+ * Some usage examples:
+ *
+ * @code
+ * int ret;
+ * opus_int32 pitch;
+ * ret = opus_decoder_ctl(dec_ctx, OPUS_GET_PITCH(&pitch));
+ * if (ret == OPUS_OK) return ret;
+ *
+ * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE);
+ * opus_decoder_ctl(dec_ctx, OPUS_RESET_STATE);
+ *
+ * opus_int32 enc_bw, dec_bw;
+ * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&enc_bw));
+ * opus_decoder_ctl(dec_ctx, OPUS_GET_BANDWIDTH(&dec_bw));
+ * if (enc_bw != dec_bw) {
+ * printf("packet bandwidth mismatch!\n");
+ * }
+ * @endcode
+ *
+ * @see opus_encoder, opus_decoder_ctl, opus_encoder_ctl, opus_decoderctls, opus_encoderctls
+ * @{
+ */
+
+/** Resets the codec state to be equivalent to a freshly initialized state.
+ * This should be called when switching streams in order to prevent
+ * the back to back decoding from giving different results from
+ * one at a time decoding.
+ * @hideinitializer */
+#define OPUS_RESET_STATE 4028
+
+/** Gets the final state of the codec's entropy coder.
+ * This is used for testing purposes,
+ * The encoder and decoder state should be identical after coding a payload
+ * (assuming no data corruption or software bugs)
+ *
+ * @param[out] x <tt>opus_uint32 *</tt>: Entropy coder state
+ *
+ * @hideinitializer */
+#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x)
+
+/** Gets the pitch of the last decoded frame, if available.
+ * This can be used for any post-processing algorithm requiring the use of pitch,
+ * e.g. time stretching/shortening. If the last frame was not voiced, or if the
+ * pitch was not coded in the frame, then zero is returned.
+ *
+ * This CTL is only implemented for decoder instances.
+ *
+ * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
+ *
+ * @hideinitializer */
+#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the encoder's configured bandpass or the decoder's last bandpass.
+ * @see OPUS_SET_BANDWIDTH
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>#OPUS_AUTO</dt> <dd>(default)</dd>
+ * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+ * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_decoderctls Decoder related CTLs
+ * @see opus_genericctls, opus_encoderctls, opus_decoder
+ * @{
+ */
+
+/** Configures decoder gain adjustment.
+ * Scales the decoded output by a factor specified in Q8 dB units.
+ * This has a maximum range of -32768 to 32767 inclusive, and returns
+ * OPUS_BAD_ARG otherwise. The default is zero indicating no adjustment.
+ * This setting survives decoder reset.
+ *
+ * gain = pow(10, x/(20.0*256))
+ *
+ * @param[in] x <tt>opus_int32</tt>: Amount to scale PCM signal by in Q8 dB units.
+ * @hideinitializer */
+#define OPUS_SET_GAIN(x) OPUS_SET_GAIN_REQUEST, __opus_check_int(x)
+/** Gets the decoder's configured gain adjustment. @see OPUS_SET_GAIN
+ *
+ * @param[out] x <tt>opus_int32 *</tt>: Amount to scale PCM signal by in Q8 dB units.
+ * @hideinitializer */
+#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_libinfo Opus library information functions
+ * @{
+ */
+
+/** Converts an opus error code into a human readable string.
+ *
+ * @param[in] error <tt>int</tt>: Error number
+ * @returns Error string
+ */
+OPUS_EXPORT const char *opus_strerror(int error);
+
+/** Gets the libopus version string.
+ *
+ * @returns Version string
+ */
+OPUS_EXPORT const char *opus_get_version_string(void);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_DEFINES_H */
diff --git a/drivers/opus/opus_demo.c b/drivers/opus/opus_demo.c
new file mode 100644
index 0000000000..7fcf65fd8b
--- /dev/null
+++ b/drivers/opus/opus_demo.c
@@ -0,0 +1,885 @@
+/* Copyright (c) 2007-2008 CSIRO
+ Copyright (c) 2007-2009 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "opus.h"
+#include "debug.h"
+#include "opus_types.h"
+#include "opus_private.h"
+#include "opus_multistream.h"
+
+#define MAX_PACKET 1500
+
+void print_usage( char* argv[] )
+{
+ fprintf(stderr, "Usage: %s [-e] <application> <sampling rate (Hz)> <channels (1/2)> "
+ "<bits per second> [options] <input> <output>\n", argv[0]);
+ fprintf(stderr, " %s -d <sampling rate (Hz)> <channels (1/2)> "
+ "[options] <input> <output>\n\n", argv[0]);
+ fprintf(stderr, "mode: voip | audio | restricted-lowdelay\n" );
+ fprintf(stderr, "options:\n" );
+ fprintf(stderr, "-e : only runs the encoder (output the bit-stream)\n" );
+ fprintf(stderr, "-d : only runs the decoder (reads the bit-stream as input)\n" );
+ fprintf(stderr, "-cbr : enable constant bitrate; default: variable bitrate\n" );
+ fprintf(stderr, "-cvbr : enable constrained variable bitrate; default: unconstrained\n" );
+ fprintf(stderr, "-variable-duration : enable frames of variable duration (experts only); default: disabled\n" );
+ fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" );
+ fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" );
+ fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" );
+ fprintf(stderr, "-complexity <comp> : complexity, 0 (lowest) ... 10 (highest); default: 10\n" );
+ fprintf(stderr, "-inbandfec : enable SILK inband FEC\n" );
+ fprintf(stderr, "-forcemono : force mono encoding, even for stereo input\n" );
+ fprintf(stderr, "-dtx : enable SILK DTX\n" );
+ fprintf(stderr, "-loss <perc> : simulate packet loss, in percent (0-100); default: 0\n" );
+}
+
+static void int_to_char(opus_uint32 i, unsigned char ch[4])
+{
+ ch[0] = i>>24;
+ ch[1] = (i>>16)&0xFF;
+ ch[2] = (i>>8)&0xFF;
+ ch[3] = i&0xFF;
+}
+
+static opus_uint32 char_to_int(unsigned char ch[4])
+{
+ return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16)
+ | ((opus_uint32)ch[2]<< 8) | (opus_uint32)ch[3];
+}
+
+static void check_encoder_option(int decode_only, const char *opt)
+{
+ if (decode_only)
+ {
+ fprintf(stderr, "option %s is only for encoding\n", opt);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static const int silk8_test[][4] = {
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*3, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*2, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*3, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*2, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 2}
+};
+
+static const int silk12_test[][4] = {
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*3, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*2, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 480, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*3, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*2, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 480, 2}
+};
+
+static const int silk16_test[][4] = {
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*3, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*2, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*3, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*2, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 2}
+};
+
+static const int hybrid24_test[][4] = {
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 2}
+};
+
+static const int hybrid48_test[][4] = {
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 1},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 2},
+ {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 2}
+};
+
+static const int celt_test[][4] = {
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 1},
+
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 1},
+
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 240, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 240, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 240, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 240, 1},
+
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 120, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 120, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 120, 1},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 120, 1},
+
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 2},
+
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 2},
+
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 240, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 240, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 240, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 240, 2},
+
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 120, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 120, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 120, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 120, 2},
+
+};
+
+static const int celt_hq_test[][4] = {
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 240, 2},
+ {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 120, 2},
+};
+
+#if 0 /* This is a hack that replaces the normal encoder/decoder with the multistream version */
+#define OpusEncoder OpusMSEncoder
+#define OpusDecoder OpusMSDecoder
+#define opus_encode opus_multistream_encode
+#define opus_decode opus_multistream_decode
+#define opus_encoder_ctl opus_multistream_encoder_ctl
+#define opus_decoder_ctl opus_multistream_decoder_ctl
+#define opus_encoder_create ms_opus_encoder_create
+#define opus_decoder_create ms_opus_decoder_create
+#define opus_encoder_destroy opus_multistream_encoder_destroy
+#define opus_decoder_destroy opus_multistream_decoder_destroy
+
+static OpusEncoder *ms_opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
+{
+ int streams, coupled_streams;
+ unsigned char mapping[256];
+ return (OpusEncoder *)opus_multistream_surround_encoder_create(Fs, channels, 1, &streams, &coupled_streams, mapping, application, error);
+}
+static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *error)
+{
+ int streams;
+ int coupled_streams;
+ unsigned char mapping[256]={0,1};
+ streams = 1;
+ coupled_streams = channels==2;
+ return (OpusDecoder *)opus_multistream_decoder_create(Fs, channels, streams, coupled_streams, mapping, error);
+}
+#endif
+
+int main(int argc, char *argv[])
+{
+ int err;
+ char *inFile, *outFile;
+ FILE *fin, *fout;
+ OpusEncoder *enc=NULL;
+ OpusDecoder *dec=NULL;
+ int args;
+ int len[2];
+ int frame_size, channels;
+ opus_int32 bitrate_bps=0;
+ unsigned char *data[2];
+ unsigned char *fbytes;
+ opus_int32 sampling_rate;
+ int use_vbr;
+ int max_payload_bytes;
+ int complexity;
+ int use_inbandfec;
+ int use_dtx;
+ int forcechannels;
+ int cvbr = 0;
+ int packet_loss_perc;
+ opus_int32 count=0, count_act=0;
+ int k;
+ opus_int32 skip=0;
+ int stop=0;
+ short *in, *out;
+ int application=OPUS_APPLICATION_AUDIO;
+ double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg;
+ double tot_samples=0;
+ opus_uint64 tot_in, tot_out;
+ int bandwidth=-1;
+ const char *bandwidth_string;
+ int lost = 0, lost_prev = 1;
+ int toggle = 0;
+ opus_uint32 enc_final_range[2];
+ opus_uint32 dec_final_range;
+ int encode_only=0, decode_only=0;
+ int max_frame_size = 960*6;
+ int curr_read=0;
+ int sweep_bps = 0;
+ int random_framesize=0, newsize=0, delayed_celt=0;
+ int sweep_max=0, sweep_min=0;
+ int random_fec=0;
+ const int (*mode_list)[4]=NULL;
+ int nb_modes_in_list=0;
+ int curr_mode=0;
+ int curr_mode_count=0;
+ int mode_switch_time = 48000;
+ int nb_encoded=0;
+ int remaining=0;
+ int variable_duration=OPUS_FRAMESIZE_ARG;
+ int delayed_decision=0;
+
+ if (argc < 5 )
+ {
+ print_usage( argv );
+ return EXIT_FAILURE;
+ }
+
+ tot_in=tot_out=0;
+ fprintf(stderr, "%s\n", opus_get_version_string());
+
+ args = 1;
+ if (strcmp(argv[args], "-e")==0)
+ {
+ encode_only = 1;
+ args++;
+ } else if (strcmp(argv[args], "-d")==0)
+ {
+ decode_only = 1;
+ args++;
+ }
+ if (!decode_only && argc < 7 )
+ {
+ print_usage( argv );
+ return EXIT_FAILURE;
+ }
+
+ if (!decode_only)
+ {
+ if (strcmp(argv[args], "voip")==0)
+ application = OPUS_APPLICATION_VOIP;
+ else if (strcmp(argv[args], "restricted-lowdelay")==0)
+ application = OPUS_APPLICATION_RESTRICTED_LOWDELAY;
+ else if (strcmp(argv[args], "audio")!=0) {
+ fprintf(stderr, "unknown application: %s\n", argv[args]);
+ print_usage(argv);
+ return EXIT_FAILURE;
+ }
+ args++;
+ }
+ sampling_rate = (opus_int32)atol(argv[args]);
+ args++;
+
+ if (sampling_rate != 8000 && sampling_rate != 12000
+ && sampling_rate != 16000 && sampling_rate != 24000
+ && sampling_rate != 48000)
+ {
+ fprintf(stderr, "Supported sampling rates are 8000, 12000, "
+ "16000, 24000 and 48000.\n");
+ return EXIT_FAILURE;
+ }
+ frame_size = sampling_rate/50;
+
+ channels = atoi(argv[args]);
+ args++;
+
+ if (channels < 1 || channels > 2)
+ {
+ fprintf(stderr, "Opus_demo supports only 1 or 2 channels.\n");
+ return EXIT_FAILURE;
+ }
+
+ if (!decode_only)
+ {
+ bitrate_bps = (opus_int32)atol(argv[args]);
+ args++;
+ }
+
+ /* defaults: */
+ use_vbr = 1;
+ bandwidth = OPUS_AUTO;
+ max_payload_bytes = MAX_PACKET;
+ complexity = 10;
+ use_inbandfec = 0;
+ forcechannels = OPUS_AUTO;
+ use_dtx = 0;
+ packet_loss_perc = 0;
+ max_frame_size = 2*48000;
+ curr_read=0;
+
+ while( args < argc - 2 ) {
+ /* process command line options */
+ if( strcmp( argv[ args ], "-cbr" ) == 0 ) {
+ check_encoder_option(decode_only, "-cbr");
+ use_vbr = 0;
+ args++;
+ } else if( strcmp( argv[ args ], "-bandwidth" ) == 0 ) {
+ check_encoder_option(decode_only, "-bandwidth");
+ if (strcmp(argv[ args + 1 ], "NB")==0)
+ bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ else if (strcmp(argv[ args + 1 ], "MB")==0)
+ bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ else if (strcmp(argv[ args + 1 ], "WB")==0)
+ bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ else if (strcmp(argv[ args + 1 ], "SWB")==0)
+ bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+ else if (strcmp(argv[ args + 1 ], "FB")==0)
+ bandwidth = OPUS_BANDWIDTH_FULLBAND;
+ else {
+ fprintf(stderr, "Unknown bandwidth %s. "
+ "Supported are NB, MB, WB, SWB, FB.\n",
+ argv[ args + 1 ]);
+ return EXIT_FAILURE;
+ }
+ args += 2;
+ } else if( strcmp( argv[ args ], "-framesize" ) == 0 ) {
+ check_encoder_option(decode_only, "-framesize");
+ if (strcmp(argv[ args + 1 ], "2.5")==0)
+ frame_size = sampling_rate/400;
+ else if (strcmp(argv[ args + 1 ], "5")==0)
+ frame_size = sampling_rate/200;
+ else if (strcmp(argv[ args + 1 ], "10")==0)
+ frame_size = sampling_rate/100;
+ else if (strcmp(argv[ args + 1 ], "20")==0)
+ frame_size = sampling_rate/50;
+ else if (strcmp(argv[ args + 1 ], "40")==0)
+ frame_size = sampling_rate/25;
+ else if (strcmp(argv[ args + 1 ], "60")==0)
+ frame_size = 3*sampling_rate/50;
+ else {
+ fprintf(stderr, "Unsupported frame size: %s ms. "
+ "Supported are 2.5, 5, 10, 20, 40, 60.\n",
+ argv[ args + 1 ]);
+ return EXIT_FAILURE;
+ }
+ args += 2;
+ } else if( strcmp( argv[ args ], "-max_payload" ) == 0 ) {
+ check_encoder_option(decode_only, "-max_payload");
+ max_payload_bytes = atoi( argv[ args + 1 ] );
+ args += 2;
+ } else if( strcmp( argv[ args ], "-complexity" ) == 0 ) {
+ check_encoder_option(decode_only, "-complexity");
+ complexity = atoi( argv[ args + 1 ] );
+ args += 2;
+ } else if( strcmp( argv[ args ], "-inbandfec" ) == 0 ) {
+ use_inbandfec = 1;
+ args++;
+ } else if( strcmp( argv[ args ], "-forcemono" ) == 0 ) {
+ check_encoder_option(decode_only, "-forcemono");
+ forcechannels = 1;
+ args++;
+ } else if( strcmp( argv[ args ], "-cvbr" ) == 0 ) {
+ check_encoder_option(decode_only, "-cvbr");
+ cvbr = 1;
+ args++;
+ } else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) {
+ check_encoder_option(decode_only, "-variable-duration");
+ variable_duration = OPUS_FRAMESIZE_VARIABLE;
+ args++;
+ } else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) {
+ check_encoder_option(decode_only, "-delayed-decision");
+ delayed_decision = 1;
+ args++;
+ } else if( strcmp( argv[ args ], "-dtx") == 0 ) {
+ check_encoder_option(decode_only, "-dtx");
+ use_dtx = 1;
+ args++;
+ } else if( strcmp( argv[ args ], "-loss" ) == 0 ) {
+ packet_loss_perc = atoi( argv[ args + 1 ] );
+ args += 2;
+ } else if( strcmp( argv[ args ], "-sweep" ) == 0 ) {
+ check_encoder_option(decode_only, "-sweep");
+ sweep_bps = atoi( argv[ args + 1 ] );
+ args += 2;
+ } else if( strcmp( argv[ args ], "-random_framesize" ) == 0 ) {
+ check_encoder_option(decode_only, "-random_framesize");
+ random_framesize = 1;
+ args++;
+ } else if( strcmp( argv[ args ], "-sweep_max" ) == 0 ) {
+ check_encoder_option(decode_only, "-sweep_max");
+ sweep_max = atoi( argv[ args + 1 ] );
+ args += 2;
+ } else if( strcmp( argv[ args ], "-random_fec" ) == 0 ) {
+ check_encoder_option(decode_only, "-random_fec");
+ random_fec = 1;
+ args++;
+ } else if( strcmp( argv[ args ], "-silk8k_test" ) == 0 ) {
+ check_encoder_option(decode_only, "-silk8k_test");
+ mode_list = silk8_test;
+ nb_modes_in_list = 8;
+ args++;
+ } else if( strcmp( argv[ args ], "-silk12k_test" ) == 0 ) {
+ check_encoder_option(decode_only, "-silk12k_test");
+ mode_list = silk12_test;
+ nb_modes_in_list = 8;
+ args++;
+ } else if( strcmp( argv[ args ], "-silk16k_test" ) == 0 ) {
+ check_encoder_option(decode_only, "-silk16k_test");
+ mode_list = silk16_test;
+ nb_modes_in_list = 8;
+ args++;
+ } else if( strcmp( argv[ args ], "-hybrid24k_test" ) == 0 ) {
+ check_encoder_option(decode_only, "-hybrid24k_test");
+ mode_list = hybrid24_test;
+ nb_modes_in_list = 4;
+ args++;
+ } else if( strcmp( argv[ args ], "-hybrid48k_test" ) == 0 ) {
+ check_encoder_option(decode_only, "-hybrid48k_test");
+ mode_list = hybrid48_test;
+ nb_modes_in_list = 4;
+ args++;
+ } else if( strcmp( argv[ args ], "-celt_test" ) == 0 ) {
+ check_encoder_option(decode_only, "-celt_test");
+ mode_list = celt_test;
+ nb_modes_in_list = 32;
+ args++;
+ } else if( strcmp( argv[ args ], "-celt_hq_test" ) == 0 ) {
+ check_encoder_option(decode_only, "-celt_hq_test");
+ mode_list = celt_hq_test;
+ nb_modes_in_list = 4;
+ args++;
+ } else {
+ printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
+ print_usage( argv );
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (sweep_max)
+ sweep_min = bitrate_bps;
+
+ if (max_payload_bytes < 0 || max_payload_bytes > MAX_PACKET)
+ {
+ fprintf (stderr, "max_payload_bytes must be between 0 and %d\n",
+ MAX_PACKET);
+ return EXIT_FAILURE;
+ }
+
+ inFile = argv[argc-2];
+ fin = fopen(inFile, "rb");
+ if (!fin)
+ {
+ fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
+ return EXIT_FAILURE;
+ }
+ if (mode_list)
+ {
+ int size;
+ fseek(fin, 0, SEEK_END);
+ size = ftell(fin);
+ fprintf(stderr, "File size is %d bytes\n", size);
+ fseek(fin, 0, SEEK_SET);
+ mode_switch_time = size/sizeof(short)/channels/nb_modes_in_list;
+ fprintf(stderr, "Switching mode every %d samples\n", mode_switch_time);
+ }
+
+ outFile = argv[argc-1];
+ fout = fopen(outFile, "wb+");
+ if (!fout)
+ {
+ fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
+ fclose(fin);
+ return EXIT_FAILURE;
+ }
+
+ if (!decode_only)
+ {
+ enc = opus_encoder_create(sampling_rate, channels, application, &err);
+ if (err != OPUS_OK)
+ {
+ fprintf(stderr, "Cannot create encoder: %s\n", opus_strerror(err));
+ fclose(fin);
+ fclose(fout);
+ return EXIT_FAILURE;
+ }
+ opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(bandwidth));
+ opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));
+ opus_encoder_ctl(enc, OPUS_SET_VBR_CONSTRAINT(cvbr));
+ opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+ opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(use_inbandfec));
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(forcechannels));
+ opus_encoder_ctl(enc, OPUS_SET_DTX(use_dtx));
+ opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));
+
+ opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&skip));
+ opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(16));
+ opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
+ }
+ if (!encode_only)
+ {
+ dec = opus_decoder_create(sampling_rate, channels, &err);
+ if (err != OPUS_OK)
+ {
+ fprintf(stderr, "Cannot create decoder: %s\n", opus_strerror(err));
+ fclose(fin);
+ fclose(fout);
+ return EXIT_FAILURE;
+ }
+ }
+
+
+ switch(bandwidth)
+ {
+ case OPUS_BANDWIDTH_NARROWBAND:
+ bandwidth_string = "narrowband";
+ break;
+ case OPUS_BANDWIDTH_MEDIUMBAND:
+ bandwidth_string = "mediumband";
+ break;
+ case OPUS_BANDWIDTH_WIDEBAND:
+ bandwidth_string = "wideband";
+ break;
+ case OPUS_BANDWIDTH_SUPERWIDEBAND:
+ bandwidth_string = "superwideband";
+ break;
+ case OPUS_BANDWIDTH_FULLBAND:
+ bandwidth_string = "fullband";
+ break;
+ case OPUS_AUTO:
+ bandwidth_string = "auto";
+ break;
+ default:
+ bandwidth_string = "unknown";
+ break;
+ }
+
+ if (decode_only)
+ fprintf(stderr, "Decoding with %ld Hz output (%d channels)\n",
+ (long)sampling_rate, channels);
+ else
+ fprintf(stderr, "Encoding %ld Hz input at %.3f kb/s "
+ "in %s mode with %d-sample frames.\n",
+ (long)sampling_rate, bitrate_bps*0.001,
+ bandwidth_string, frame_size);
+
+ in = (short*)malloc(max_frame_size*channels*sizeof(short));
+ out = (short*)malloc(max_frame_size*channels*sizeof(short));
+ fbytes = (unsigned char*)malloc(max_frame_size*channels*sizeof(short));
+ data[0] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));
+ if ( use_inbandfec ) {
+ data[1] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));
+ }
+ if(delayed_decision)
+ {
+ if (variable_duration!=OPUS_FRAMESIZE_VARIABLE)
+ {
+ if (frame_size==sampling_rate/400)
+ variable_duration = OPUS_FRAMESIZE_2_5_MS;
+ else if (frame_size==sampling_rate/200)
+ variable_duration = OPUS_FRAMESIZE_5_MS;
+ else if (frame_size==sampling_rate/100)
+ variable_duration = OPUS_FRAMESIZE_10_MS;
+ else if (frame_size==sampling_rate/50)
+ variable_duration = OPUS_FRAMESIZE_20_MS;
+ else if (frame_size==sampling_rate/25)
+ variable_duration = OPUS_FRAMESIZE_40_MS;
+ else
+ variable_duration = OPUS_FRAMESIZE_60_MS;
+ opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
+ }
+ frame_size = 2*48000;
+ }
+ while (!stop)
+ {
+ if (delayed_celt)
+ {
+ frame_size = newsize;
+ delayed_celt = 0;
+ } else if (random_framesize && rand()%20==0)
+ {
+ newsize = rand()%6;
+ switch(newsize)
+ {
+ case 0: newsize=sampling_rate/400; break;
+ case 1: newsize=sampling_rate/200; break;
+ case 2: newsize=sampling_rate/100; break;
+ case 3: newsize=sampling_rate/50; break;
+ case 4: newsize=sampling_rate/25; break;
+ case 5: newsize=3*sampling_rate/50; break;
+ }
+ while (newsize < sampling_rate/25 && bitrate_bps-fabs(sweep_bps) <= 3*12*sampling_rate/newsize)
+ newsize*=2;
+ if (newsize < sampling_rate/100 && frame_size >= sampling_rate/100)
+ {
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+ delayed_celt=1;
+ } else {
+ frame_size = newsize;
+ }
+ }
+ if (random_fec && rand()%30==0)
+ {
+ opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(rand()%4==0));
+ }
+ if (decode_only)
+ {
+ unsigned char ch[4];
+ err = fread(ch, 1, 4, fin);
+ if (feof(fin))
+ break;
+ len[toggle] = char_to_int(ch);
+ if (len[toggle]>max_payload_bytes || len[toggle]<0)
+ {
+ fprintf(stderr, "Invalid payload length: %d\n",len[toggle]);
+ break;
+ }
+ err = fread(ch, 1, 4, fin);
+ enc_final_range[toggle] = char_to_int(ch);
+ err = fread(data[toggle], 1, len[toggle], fin);
+ if (err<len[toggle])
+ {
+ fprintf(stderr, "Ran out of input, "
+ "expecting %d bytes got %d\n",
+ len[toggle],err);
+ break;
+ }
+ } else {
+ int i;
+ if (mode_list!=NULL)
+ {
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(mode_list[curr_mode][1]));
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(mode_list[curr_mode][0]));
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
+ frame_size = mode_list[curr_mode][2];
+ }
+ err = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
+ curr_read = err;
+ tot_in += curr_read;
+ for(i=0;i<curr_read*channels;i++)
+ {
+ opus_int32 s;
+ s=fbytes[2*i+1]<<8|fbytes[2*i];
+ s=((s&0xFFFF)^0x8000)-0x8000;
+ in[i+remaining*channels]=s;
+ }
+ if (curr_read+remaining < frame_size)
+ {
+ for (i=(curr_read+remaining)*channels;i<frame_size*channels;i++)
+ in[i] = 0;
+ if (encode_only || decode_only)
+ stop = 1;
+ }
+ len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
+ nb_encoded = opus_packet_get_samples_per_frame(data[toggle], sampling_rate)*opus_packet_get_nb_frames(data[toggle], len[toggle]);
+ remaining = frame_size-nb_encoded;
+ for(i=0;i<remaining*channels;i++)
+ in[i] = in[nb_encoded*channels+i];
+ if (sweep_bps!=0)
+ {
+ bitrate_bps += sweep_bps;
+ if (sweep_max)
+ {
+ if (bitrate_bps > sweep_max)
+ sweep_bps = -sweep_bps;
+ else if (bitrate_bps < sweep_min)
+ sweep_bps = -sweep_bps;
+ }
+ /* safety */
+ if (bitrate_bps<1000)
+ bitrate_bps = 1000;
+ opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+ }
+ opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle]));
+ if (len[toggle] < 0)
+ {
+ fprintf (stderr, "opus_encode() returned %d\n", len[toggle]);
+ fclose(fin);
+ fclose(fout);
+ return EXIT_FAILURE;
+ }
+ curr_mode_count += frame_size;
+ if (curr_mode_count > mode_switch_time && curr_mode < nb_modes_in_list-1)
+ {
+ curr_mode++;
+ curr_mode_count = 0;
+ }
+ }
+
+#if 0 /* This is for testing the padding code, do not enable by default */
+ if (len[toggle]<1275)
+ {
+ int new_len = len[toggle]+rand()%(max_payload_bytes-len[toggle]);
+ if ((err = opus_packet_pad(data[toggle], len[toggle], new_len)) != OPUS_OK)
+ {
+ fprintf(stderr, "padding failed: %s\n", opus_strerror(err));
+ return EXIT_FAILURE;
+ }
+ len[toggle] = new_len;
+ }
+#endif
+ if (encode_only)
+ {
+ unsigned char int_field[4];
+ int_to_char(len[toggle], int_field);
+ if (fwrite(int_field, 1, 4, fout) != 4) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ int_to_char(enc_final_range[toggle], int_field);
+ if (fwrite(int_field, 1, 4, fout) != 4) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ if (fwrite(data[toggle], 1, len[toggle], fout) != (unsigned)len[toggle]) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ tot_samples += nb_encoded;
+ } else {
+ int output_samples;
+ lost = len[toggle]==0 || (packet_loss_perc>0 && rand()%100 < packet_loss_perc);
+ if (lost)
+ opus_decoder_ctl(dec, OPUS_GET_LAST_PACKET_DURATION(&output_samples));
+ else
+ output_samples = max_frame_size;
+ if( count >= use_inbandfec ) {
+ /* delay by one packet when using in-band FEC */
+ if( use_inbandfec ) {
+ if( lost_prev ) {
+ /* attempt to decode with in-band FEC from next packet */
+ opus_decoder_ctl(dec, OPUS_GET_LAST_PACKET_DURATION(&output_samples));
+ output_samples = opus_decode(dec, lost ? NULL : data[toggle], len[toggle], out, output_samples, 1);
+ } else {
+ /* regular decode */
+ output_samples = max_frame_size;
+ output_samples = opus_decode(dec, data[1-toggle], len[1-toggle], out, output_samples, 0);
+ }
+ } else {
+ output_samples = opus_decode(dec, lost ? NULL : data[toggle], len[toggle], out, output_samples, 0);
+ }
+ if (output_samples>0)
+ {
+ if (!decode_only && tot_out + output_samples > tot_in)
+ {
+ stop=1;
+ output_samples = tot_in-tot_out;
+ }
+ if (output_samples>skip) {
+ int i;
+ for(i=0;i<(output_samples-skip)*channels;i++)
+ {
+ short s;
+ s=out[i+(skip*channels)];
+ fbytes[2*i]=s&0xFF;
+ fbytes[2*i+1]=(s>>8)&0xFF;
+ }
+ if (fwrite(fbytes, sizeof(short)*channels, output_samples-skip, fout) != (unsigned)(output_samples-skip)){
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ tot_out += output_samples-skip;
+ }
+ if (output_samples<skip) skip -= output_samples;
+ else skip = 0;
+ } else {
+ fprintf(stderr, "error decoding frame: %s\n",
+ opus_strerror(output_samples));
+ }
+ tot_samples += output_samples;
+ }
+ }
+
+ if (!encode_only)
+ opus_decoder_ctl(dec, OPUS_GET_FINAL_RANGE(&dec_final_range));
+ /* compare final range encoder rng values of encoder and decoder */
+ if( enc_final_range[toggle^use_inbandfec]!=0 && !encode_only
+ && !lost && !lost_prev
+ && dec_final_range != enc_final_range[toggle^use_inbandfec] ) {
+ fprintf (stderr, "Error: Range coder state mismatch "
+ "between encoder and decoder "
+ "in frame %ld: 0x%8lx vs 0x%8lx\n",
+ (long)count,
+ (unsigned long)enc_final_range[toggle^use_inbandfec],
+ (unsigned long)dec_final_range);
+ fclose(fin);
+ fclose(fout);
+ return EXIT_FAILURE;
+ }
+
+ lost_prev = lost;
+
+ /* count bits */
+ bits += len[toggle]*8;
+ bits_max = ( len[toggle]*8 > bits_max ) ? len[toggle]*8 : bits_max;
+ if( count >= use_inbandfec ) {
+ nrg = 0.0;
+ if (!decode_only)
+ {
+ for ( k = 0; k < frame_size * channels; k++ ) {
+ nrg += in[ k ] * (double)in[ k ];
+ }
+ }
+ if ( ( nrg / ( frame_size * channels ) ) > 1e5 ) {
+ bits_act += len[toggle]*8;
+ count_act++;
+ }
+ /* Variance */
+ bits2 += len[toggle]*len[toggle]*64;
+ }
+ count++;
+ toggle = (toggle + use_inbandfec) & 1;
+ }
+ fprintf (stderr, "average bitrate: %7.3f kb/s\n",
+ 1e-3*bits*sampling_rate/tot_samples);
+ fprintf (stderr, "maximum bitrate: %7.3f kb/s\n",
+ 1e-3*bits_max*sampling_rate/frame_size);
+ if (!decode_only)
+ fprintf (stderr, "active bitrate: %7.3f kb/s\n",
+ 1e-3*bits_act*sampling_rate/(frame_size*(double)count_act));
+ fprintf (stderr, "bitrate standard deviation: %7.3f kb/s\n",
+ 1e-3*sqrt(bits2/count - bits*bits/(count*(double)count))*sampling_rate/frame_size);
+ /* Close any files to which intermediate results were stored */
+ SILK_DEBUG_STORE_CLOSE_FILES
+ silk_TimerSave("opus_timing.txt");
+ opus_encoder_destroy(enc);
+ opus_decoder_destroy(dec);
+ free(data[0]);
+ if (use_inbandfec)
+ free(data[1]);
+ fclose(fin);
+ fclose(fout);
+ free(in);
+ free(out);
+ free(fbytes);
+ return EXIT_SUCCESS;
+}
diff --git a/drivers/opus/opus_encoder.c b/drivers/opus/opus_encoder.c
new file mode 100644
index 0000000000..f739daa258
--- /dev/null
+++ b/drivers/opus/opus_encoder.c
@@ -0,0 +1,2488 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+ Written by Jean-Marc Valin and Koen Vos */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdarg.h>
+#include "celt.h"
+#include "entenc.h"
+#include "opus_modes.h"
+#include "API.h"
+#include "stack_alloc.h"
+#include "float_cast.h"
+#include "opus.h"
+#include "arch.h"
+#include "opus_private.h"
+#include "os_support.h"
+#include "cpu_support.h"
+#include "analysis.h"
+#include "mathops.h"
+#include "tuning_parameters.h"
+#ifdef OPUS_FIXED_POINT
+#include "fixed/structs_FIX.h"
+#else
+#include "float/structs_FLP.h"
+#endif
+
+#define MAX_ENCODER_BUFFER 480
+
+typedef struct {
+ opus_val32 XX, XY, YY;
+ opus_val16 smoothed_width;
+ opus_val16 max_follower;
+} StereoWidthState;
+
+struct OpusEncoder {
+ int celt_enc_offset;
+ int silk_enc_offset;
+ silk_EncControlStruct silk_mode;
+ int application;
+ int channels;
+ int delay_compensation;
+ int force_channels;
+ int signal_type;
+ int user_bandwidth;
+ int max_bandwidth;
+ int user_forced_mode;
+ int voice_ratio;
+ opus_int32 Fs;
+ int use_vbr;
+ int vbr_constraint;
+ int variable_duration;
+ opus_int32 bitrate_bps;
+ opus_int32 user_bitrate_bps;
+ int lsb_depth;
+ int encoder_buffer;
+ int lfe;
+
+#define OPUS_ENCODER_RESET_START stream_channels
+ int stream_channels;
+ opus_int16 hybrid_stereo_width_Q14;
+ opus_int32 variable_HP_smth2_Q15;
+ opus_val16 prev_HB_gain;
+ opus_val32 hp_mem[4];
+ int mode;
+ int prev_mode;
+ int prev_channels;
+ int prev_framesize;
+ int bandwidth;
+ int silk_bw_switch;
+ /* Sampling rate (at the API level) */
+ int first;
+ opus_val16 * energy_masking;
+ StereoWidthState width_mem;
+ opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
+#ifndef DISABLE_FLOAT_API
+ TonalityAnalysisState analysis;
+ int detected_bandwidth;
+ int analysis_offset;
+#endif
+ opus_uint32 rangeFinal;
+ int arch;
+};
+
+/* Transition tables for the voice and music. First column is the
+ middle (memoriless) threshold. The second column is the hysteresis
+ (difference with the middle) */
+static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
+ 11000, 1000, /* NB<->MB */
+ 14000, 1000, /* MB<->WB */
+ 17000, 1000, /* WB<->SWB */
+ 21000, 2000, /* SWB<->FB */
+};
+static const opus_int32 mono_music_bandwidth_thresholds[8] = {
+ 12000, 1000, /* NB<->MB */
+ 15000, 1000, /* MB<->WB */
+ 18000, 2000, /* WB<->SWB */
+ 22000, 2000, /* SWB<->FB */
+};
+static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
+ 11000, 1000, /* NB<->MB */
+ 14000, 1000, /* MB<->WB */
+ 21000, 2000, /* WB<->SWB */
+ 28000, 2000, /* SWB<->FB */
+};
+static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
+ 12000, 1000, /* NB<->MB */
+ 18000, 2000, /* MB<->WB */
+ 21000, 2000, /* WB<->SWB */
+ 30000, 2000, /* SWB<->FB */
+};
+/* Threshold bit-rates for switching between mono and stereo */
+static const opus_int32 stereo_voice_threshold = 30000;
+static const opus_int32 stereo_music_threshold = 30000;
+
+/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
+static const opus_int32 mode_thresholds[2][2] = {
+ /* voice */ /* music */
+ { 64000, 16000}, /* mono */
+ { 36000, 16000}, /* stereo */
+};
+
+int opus_encoder_get_size(int channels)
+{
+ int silkEncSizeBytes, celtEncSizeBytes;
+ int ret;
+ if (channels<1 || channels > 2)
+ return 0;
+ ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
+ if (ret)
+ return 0;
+ silkEncSizeBytes = align(silkEncSizeBytes);
+ celtEncSizeBytes = celt_encoder_get_size(channels);
+ return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes;
+}
+
+int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application)
+{
+ void *silk_enc;
+ CELTEncoder *celt_enc;
+ int err;
+ int ret, silkEncSizeBytes;
+
+ if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
+ (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
+ && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
+ return OPUS_BAD_ARG;
+
+ OPUS_CLEAR((char*)st, opus_encoder_get_size(channels));
+ /* Create SILK encoder */
+ ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
+ if (ret)
+ return OPUS_BAD_ARG;
+ silkEncSizeBytes = align(silkEncSizeBytes);
+ st->silk_enc_offset = align(sizeof(OpusEncoder));
+ st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes;
+ silk_enc = (char*)st+st->silk_enc_offset;
+ celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+
+ st->stream_channels = st->channels = channels;
+
+ st->Fs = Fs;
+
+ st->arch = opus_select_arch();
+
+ ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode );
+ if(ret)return OPUS_INTERNAL_ERROR;
+
+ /* default SILK parameters */
+ st->silk_mode.nChannelsAPI = channels;
+ st->silk_mode.nChannelsInternal = channels;
+ st->silk_mode.API_sampleRate = st->Fs;
+ st->silk_mode.maxInternalSampleRate = 16000;
+ st->silk_mode.minInternalSampleRate = 8000;
+ st->silk_mode.desiredInternalSampleRate = 16000;
+ st->silk_mode.payloadSize_ms = 20;
+ st->silk_mode.bitRate = 25000;
+ st->silk_mode.packetLossPercentage = 0;
+ st->silk_mode.complexity = 9;
+ st->silk_mode.useInBandFEC = 0;
+ st->silk_mode.useDTX = 0;
+ st->silk_mode.useCBR = 0;
+ st->silk_mode.reducedDependency = 0;
+
+ /* Create CELT encoder */
+ /* Initialize CELT encoder */
+ err = celt_encoder_init(celt_enc, Fs, channels, st->arch);
+ if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;
+
+ celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
+ celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
+
+ st->use_vbr = 1;
+ /* Makes constrained VBR the default (safer for real-time use) */
+ st->vbr_constraint = 1;
+ st->user_bitrate_bps = OPUS_AUTO;
+ st->bitrate_bps = 3000+Fs*channels;
+ st->application = application;
+ st->signal_type = OPUS_AUTO;
+ st->user_bandwidth = OPUS_AUTO;
+ st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+ st->force_channels = OPUS_AUTO;
+ st->user_forced_mode = OPUS_AUTO;
+ st->voice_ratio = -1;
+ st->encoder_buffer = st->Fs/100;
+ st->lsb_depth = 24;
+ st->variable_duration = OPUS_FRAMESIZE_ARG;
+
+ /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
+ + 1.5 ms for SILK resamplers and stereo prediction) */
+ st->delay_compensation = st->Fs/250;
+
+ st->hybrid_stereo_width_Q14 = 1 << 14;
+ st->prev_HB_gain = Q15ONE;
+ st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+ st->first = 1;
+ st->mode = MODE_HYBRID;
+ st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
+
+ return OPUS_OK;
+}
+
+static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels)
+{
+ int period;
+ unsigned char toc;
+ period = 0;
+ while (framerate < 400)
+ {
+ framerate <<= 1;
+ period++;
+ }
+ if (mode == MODE_SILK_ONLY)
+ {
+ toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5;
+ toc |= (period-2)<<3;
+ } else if (mode == MODE_CELT_ONLY)
+ {
+ int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND;
+ if (tmp < 0)
+ tmp = 0;
+ toc = 0x80;
+ toc |= tmp << 5;
+ toc |= period<<3;
+ } else /* Hybrid */
+ {
+ toc = 0x60;
+ toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4;
+ toc |= (period-2)<<3;
+ }
+ toc |= (channels==2)<<2;
+ return toc;
+}
+
+#ifndef OPUS_FIXED_POINT
+static void silk_biquad_float(
+ const opus_val16 *in, /* I: Input signal */
+ const opus_int32 *B_Q28, /* I: MA coefficients [3] */
+ const opus_int32 *A_Q28, /* I: AR coefficients [2] */
+ opus_val32 *S, /* I/O: State vector [2] */
+ opus_val16 *out, /* O: Output signal */
+ const opus_int32 len, /* I: Signal length (must be even) */
+ int stride
+)
+{
+ /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
+ opus_int k;
+ opus_val32 vout;
+ opus_val32 inval;
+ opus_val32 A[2], B[3];
+
+ A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28)));
+ A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28)));
+ B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28)));
+ B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28)));
+ B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28)));
+
+ /* Negate A_Q28 values and split in two parts */
+
+ for( k = 0; k < len; k++ ) {
+ /* S[ 0 ], S[ 1 ]: Q12 */
+ inval = in[ k*stride ];
+ vout = S[ 0 ] + B[0]*inval;
+
+ S[ 0 ] = S[1] - vout*A[0] + B[1]*inval;
+
+ S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL;
+
+ /* Scale back to Q0 and saturate */
+ out[ k*stride ] = vout;
+ }
+}
+#endif
+
+static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+ opus_int32 B_Q28[ 3 ], A_Q28[ 2 ];
+ opus_int32 Fc_Q19, r_Q28, r_Q22;
+
+ silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) );
+ Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 );
+ silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 );
+
+ r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 );
+
+ /* b = r * [ 1; -2; 1 ]; */
+ /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */
+ B_Q28[ 0 ] = r_Q28;
+ B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 );
+ B_Q28[ 2 ] = r_Q28;
+
+ /* -r * ( 2 - Fc * Fc ); */
+ r_Q22 = silk_RSHIFT( r_Q28, 6 );
+ A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) );
+ A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 );
+
+#ifdef OPUS_FIXED_POINT
+ silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels );
+ if( channels == 2 ) {
+ silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
+ }
+#else
+ silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels );
+ if( channels == 2 ) {
+ silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
+ }
+#endif
+}
+
+#ifdef OPUS_FIXED_POINT
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+ int c, i;
+ int shift;
+
+ /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */
+ shift=celt_ilog2(Fs/(cutoff_Hz*3));
+ for (c=0;c<channels;c++)
+ {
+ for (i=0;i<len;i++)
+ {
+ opus_val32 x, tmp, y;
+ x = SHL32(EXTEND32(in[channels*i+c]), 15);
+ /* First stage */
+ tmp = x-hp_mem[2*c];
+ hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
+ /* Second stage */
+ y = tmp - hp_mem[2*c+1];
+ hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift);
+ out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767));
+ }
+ }
+}
+
+#else
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+ int c, i;
+ float coef;
+
+ coef = 4.0f*cutoff_Hz/Fs;
+ for (c=0;c<channels;c++)
+ {
+ for (i=0;i<len;i++)
+ {
+ opus_val32 x, tmp, y;
+ x = in[channels*i+c];
+ /* First stage */
+ tmp = x-hp_mem[2*c];
+ hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL;
+ /* Second stage */
+ y = tmp - hp_mem[2*c+1];
+ hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL;
+ out[channels*i+c] = y;
+ }
+ }
+}
+#endif
+
+static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
+ int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
+{
+ int i;
+ int overlap;
+ int inc;
+ inc = 48000/Fs;
+ overlap=overlap48/inc;
+ g1 = Q15ONE-g1;
+ g2 = Q15ONE-g2;
+ for (i=0;i<overlap;i++)
+ {
+ opus_val32 diff;
+ opus_val16 g, w;
+ w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+ g = SHR32(MAC16_16(MULT16_16(w,g2),
+ Q15ONE-w, g1), 15);
+ diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+ diff = MULT16_16_Q15(g, diff);
+ out[i*channels] = out[i*channels] - diff;
+ out[i*channels+1] = out[i*channels+1] + diff;
+ }
+ for (;i<frame_size;i++)
+ {
+ opus_val32 diff;
+ diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+ diff = MULT16_16_Q15(g2, diff);
+ out[i*channels] = out[i*channels] - diff;
+ out[i*channels+1] = out[i*channels+1] + diff;
+ }
+}
+
+static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
+ int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
+{
+ int i;
+ int inc;
+ int overlap;
+ int c;
+ inc = 48000/Fs;
+ overlap=overlap48/inc;
+ if (channels==1)
+ {
+ for (i=0;i<overlap;i++)
+ {
+ opus_val16 g, w;
+ w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+ g = SHR32(MAC16_16(MULT16_16(w,g2),
+ Q15ONE-w, g1), 15);
+ out[i] = MULT16_16_Q15(g, in[i]);
+ }
+ } else {
+ for (i=0;i<overlap;i++)
+ {
+ opus_val16 g, w;
+ w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+ g = SHR32(MAC16_16(MULT16_16(w,g2),
+ Q15ONE-w, g1), 15);
+ out[i*2] = MULT16_16_Q15(g, in[i*2]);
+ out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]);
+ }
+ }
+ c=0;do {
+ for (i=overlap;i<frame_size;i++)
+ {
+ out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]);
+ }
+ }
+ while (++c<channels);
+}
+
+OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
+{
+ int ret;
+ OpusEncoder *st;
+ if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
+ (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
+ && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+ st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels));
+ if (st == NULL)
+ {
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ return NULL;
+ }
+ ret = opus_encoder_init(st, Fs, channels, application);
+ if (error)
+ *error = ret;
+ if (ret != OPUS_OK)
+ {
+ opus_free(st);
+ st = NULL;
+ }
+ return st;
+}
+
+static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
+{
+ if(!frame_size)frame_size=st->Fs/400;
+ if (st->user_bitrate_bps==OPUS_AUTO)
+ return 60*st->Fs/frame_size + st->Fs*st->channels;
+ else if (st->user_bitrate_bps==OPUS_BITRATE_MAX)
+ return max_data_bytes*8*st->Fs/frame_size;
+ else
+ return st->user_bitrate_bps;
+}
+
+#ifndef DISABLE_FLOAT_API
+/* Don't use more than 60 ms for the frame size analysis */
+#define MAX_DYNAMIC_FRAMESIZE 24
+/* Estimates how much the bitrate will be boosted based on the sub-frame energy */
+static float transient_boost(const float *E, const float *E_1, int LM, int maxM)
+{
+ int i;
+ int M;
+ float sumE=0, sumE_1=0;
+ float metric;
+
+ M = IMIN(maxM, (1<<LM)+1);
+ for (i=0;i<M;i++)
+ {
+ sumE += E[i];
+ sumE_1 += E_1[i];
+ }
+ metric = sumE*sumE_1/(M*M);
+ /*if (LM==3)
+ printf("%f\n", metric);*/
+ /*return metric>10 ? 1 : 0;*/
+ /*return MAX16(0,1-exp(-.25*(metric-2.)));*/
+ return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2))));
+}
+
+/* Viterbi decoding trying to find the best frame size combination using look-ahead
+
+ State numbering:
+ 0: unused
+ 1: 2.5 ms
+ 2: 5 ms (#1)
+ 3: 5 ms (#2)
+ 4: 10 ms (#1)
+ 5: 10 ms (#2)
+ 6: 10 ms (#3)
+ 7: 10 ms (#4)
+ 8: 20 ms (#1)
+ 9: 20 ms (#2)
+ 10: 20 ms (#3)
+ 11: 20 ms (#4)
+ 12: 20 ms (#5)
+ 13: 20 ms (#6)
+ 14: 20 ms (#7)
+ 15: 20 ms (#8)
+*/
+static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)
+{
+ int i;
+ float cost[MAX_DYNAMIC_FRAMESIZE][16];
+ int states[MAX_DYNAMIC_FRAMESIZE][16];
+ float best_cost;
+ int best_state;
+ float factor;
+ /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */
+ if (rate<80)
+ factor=0;
+ else if (rate>160)
+ factor=1;
+ else
+ factor = (rate-80.f)/80.f;
+ /* Makes variable framesize less aggressive at lower bitrates, but I can't
+ find any valid theoretical justification for this (other than it seems
+ to help) */
+ for (i=0;i<16;i++)
+ {
+ /* Impossible state */
+ states[0][i] = -1;
+ cost[0][i] = 1e10;
+ }
+ for (i=0;i<4;i++)
+ {
+ cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1));
+ states[0][1<<i] = i;
+ }
+ for (i=1;i<N;i++)
+ {
+ int j;
+
+ /* Follow continuations */
+ for (j=2;j<16;j++)
+ {
+ cost[i][j] = cost[i-1][j-1];
+ states[i][j] = j-1;
+ }
+
+ /* New frames */
+ for(j=0;j<4;j++)
+ {
+ int k;
+ float min_cost;
+ float curr_cost;
+ states[i][1<<j] = 1;
+ min_cost = cost[i-1][1];
+ for(k=1;k<4;k++)
+ {
+ float tmp = cost[i-1][(1<<(k+1))-1];
+ if (tmp < min_cost)
+ {
+ states[i][1<<j] = (1<<(k+1))-1;
+ min_cost = tmp;
+ }
+ }
+ curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1));
+ cost[i][1<<j] = min_cost;
+ /* If part of the frame is outside the analysis window, only count part of the cost */
+ if (N-i < (1<<j))
+ cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);
+ else
+ cost[i][1<<j] += curr_cost;
+ }
+ }
+
+ best_state=1;
+ best_cost = cost[N-1][1];
+ /* Find best end state (doesn't force a frame to end at N-1) */
+ for (i=2;i<16;i++)
+ {
+ if (cost[N-1][i]<best_cost)
+ {
+ best_cost = cost[N-1][i];
+ best_state = i;
+ }
+ }
+
+ /* Follow transitions back */
+ for (i=N-1;i>=0;i--)
+ {
+ /*printf("%d ", best_state);*/
+ best_state = states[i][best_state];
+ }
+ /*printf("%d\n", best_state);*/
+ return best_state;
+}
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+ int bitrate, opus_val16 tonality, float *mem, int buffering,
+ downmix_func downmix)
+{
+ int N;
+ int i;
+ float e[MAX_DYNAMIC_FRAMESIZE+4];
+ float e_1[MAX_DYNAMIC_FRAMESIZE+3];
+ opus_val32 memx;
+ int bestLM=0;
+ int subframe;
+ int pos;
+ VARDECL(opus_val32, sub);
+
+ subframe = Fs/400;
+ ALLOC(sub, subframe, opus_val32);
+ e[0]=mem[0];
+ e_1[0]=1.f/(EPSILON+mem[0]);
+ if (buffering)
+ {
+ /* Consider the CELT delay when not in restricted-lowdelay */
+ /* We assume the buffering is between 2.5 and 5 ms */
+ int offset = 2*subframe - buffering;
+ celt_assert(offset>=0 && offset <= subframe);
+ x += C*offset;
+ len -= offset;
+ e[1]=mem[1];
+ e_1[1]=1.f/(EPSILON+mem[1]);
+ e[2]=mem[2];
+ e_1[2]=1.f/(EPSILON+mem[2]);
+ pos = 3;
+ } else {
+ pos=1;
+ }
+ N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
+ /* Just silencing a warning, it's really initialized later */
+ memx = 0;
+ for (i=0;i<N;i++)
+ {
+ float tmp;
+ opus_val32 tmpx;
+ int j;
+ tmp=EPSILON;
+
+ downmix(x, sub, subframe, i*subframe, 0, -2, C);
+ if (i==0)
+ memx = sub[0];
+ for (j=0;j<subframe;j++)
+ {
+ tmpx = sub[j];
+ tmp += (tmpx-memx)*(float)(tmpx-memx);
+ memx = tmpx;
+ }
+ e[i+pos] = tmp;
+ e_1[i+pos] = 1.f/tmp;
+ }
+ /* Hack to get 20 ms working with APPLICATION_AUDIO
+ The real problem is that the corresponding memory needs to use 1.5 ms
+ from this frame and 1 ms from the next frame */
+ e[i+pos] = e[i+pos-1];
+ if (buffering)
+ N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);
+ bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400);
+ mem[0] = e[1<<bestLM];
+ if (buffering)
+ {
+ mem[1] = e[(1<<bestLM)+1];
+ mem[2] = e[(1<<bestLM)+2];
+ }
+ return bestLM;
+}
+
+#endif
+
+#ifndef DISABLE_FLOAT_API
+#ifdef OPUS_FIXED_POINT
+#define PCM2VAL(x) FLOAT2INT16(x)
+#else
+#define PCM2VAL(x) SCALEIN(x)
+#endif
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+ const float *x;
+ opus_val32 scale;
+ int j;
+ x = (const float *)_x;
+ for (j=0;j<subframe;j++)
+ sub[j] = PCM2VAL(x[(j+offset)*C+c1]);
+ if (c2>-1)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += PCM2VAL(x[(j+offset)*C+c2]);
+ } else if (c2==-2)
+ {
+ int c;
+ for (c=1;c<C;c++)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += PCM2VAL(x[(j+offset)*C+c]);
+ }
+ }
+#ifdef OPUS_FIXED_POINT
+ scale = (1<<SIG_SHIFT);
+#else
+ scale = 1.f;
+#endif
+ if (C==-2)
+ scale /= C;
+ else
+ scale /= 2;
+ for (j=0;j<subframe;j++)
+ sub[j] *= scale;
+}
+#endif
+
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+ const opus_int16 *x;
+ opus_val32 scale;
+ int j;
+ x = (const opus_int16 *)_x;
+ for (j=0;j<subframe;j++)
+ sub[j] = x[(j+offset)*C+c1];
+ if (c2>-1)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += x[(j+offset)*C+c2];
+ } else if (c2==-2)
+ {
+ int c;
+ for (c=1;c<C;c++)
+ {
+ for (j=0;j<subframe;j++)
+ sub[j] += x[(j+offset)*C+c];
+ }
+ }
+#ifdef OPUS_FIXED_POINT
+ scale = (1<<SIG_SHIFT);
+#else
+ scale = 1.f/32768;
+#endif
+ if (C==-2)
+ scale /= C;
+ else
+ scale /= 2;
+ for (j=0;j<subframe;j++)
+ sub[j] *= scale;
+}
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
+{
+ int new_size;
+ if (frame_size<Fs/400)
+ return -1;
+ if (variable_duration == OPUS_FRAMESIZE_ARG)
+ new_size = frame_size;
+ else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)
+ new_size = Fs/50;
+ else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)
+ new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));
+ else
+ return -1;
+ if (new_size>frame_size)
+ return -1;
+ if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
+ 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)
+ return -1;
+ return new_size;
+}
+
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+ int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+ int delay_compensation, downmix_func downmix
+#ifndef DISABLE_FLOAT_API
+ , float *subframe_mem
+#endif
+ )
+{
+#ifndef DISABLE_FLOAT_API
+ if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+ {
+ int LM = 3;
+ LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps,
+ 0, subframe_mem, delay_compensation, downmix);
+ while ((Fs/400<<LM)>frame_size)
+ LM--;
+ frame_size = (Fs/400<<LM);
+ } else
+#endif
+ {
+ frame_size = frame_size_select(frame_size, variable_duration, Fs);
+ }
+ if (frame_size<0)
+ return -1;
+ return frame_size;
+}
+
+opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
+{
+ opus_val16 corr;
+ opus_val16 ldiff;
+ opus_val16 width;
+ opus_val32 xx, xy, yy;
+ opus_val16 sqrt_xx, sqrt_yy;
+ opus_val16 qrrt_xx, qrrt_yy;
+ int frame_rate;
+ int i;
+ opus_val16 short_alpha;
+
+ frame_rate = Fs/frame_size;
+ short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate);
+ xx=xy=yy=0;
+ for (i=0;i<frame_size;i+=4)
+ {
+ opus_val32 pxx=0;
+ opus_val32 pxy=0;
+ opus_val32 pyy=0;
+ opus_val16 x, y;
+ x = pcm[2*i];
+ y = pcm[2*i+1];
+ pxx = SHR32(MULT16_16(x,x),2);
+ pxy = SHR32(MULT16_16(x,y),2);
+ pyy = SHR32(MULT16_16(y,y),2);
+ x = pcm[2*i+2];
+ y = pcm[2*i+3];
+ pxx += SHR32(MULT16_16(x,x),2);
+ pxy += SHR32(MULT16_16(x,y),2);
+ pyy += SHR32(MULT16_16(y,y),2);
+ x = pcm[2*i+4];
+ y = pcm[2*i+5];
+ pxx += SHR32(MULT16_16(x,x),2);
+ pxy += SHR32(MULT16_16(x,y),2);
+ pyy += SHR32(MULT16_16(y,y),2);
+ x = pcm[2*i+6];
+ y = pcm[2*i+7];
+ pxx += SHR32(MULT16_16(x,x),2);
+ pxy += SHR32(MULT16_16(x,y),2);
+ pyy += SHR32(MULT16_16(y,y),2);
+
+ xx += SHR32(pxx, 10);
+ xy += SHR32(pxy, 10);
+ yy += SHR32(pyy, 10);
+ }
+ mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
+ mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
+ mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
+ mem->XX = MAX32(0, mem->XX);
+ mem->XY = MAX32(0, mem->XY);
+ mem->YY = MAX32(0, mem->YY);
+ if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
+ {
+ sqrt_xx = celt_sqrt(mem->XX);
+ sqrt_yy = celt_sqrt(mem->YY);
+ qrrt_xx = celt_sqrt(sqrt_xx);
+ qrrt_yy = celt_sqrt(sqrt_yy);
+ /* Inter-channel correlation */
+ mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
+ corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
+ /* Approximate loudness difference */
+ ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy);
+ width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
+ /* Smoothing over one second */
+ mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
+ /* Peak follower */
+ mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
+ } else {
+ width = 0;
+ corr=Q15ONE;
+ ldiff=0;
+ }
+ /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
+ return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower));
+}
+
+opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+ unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+ const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)
+{
+ void *silk_enc;
+ CELTEncoder *celt_enc;
+ int i;
+ int ret=0;
+ opus_int32 nBytes;
+ ec_enc enc;
+ int bytes_target;
+ int prefill=0;
+ int start_band = 0;
+ int redundancy = 0;
+ int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
+ int celt_to_silk = 0;
+ VARDECL(opus_val16, pcm_buf);
+ int nb_compr_bytes;
+ int to_celt = 0;
+ opus_uint32 redundant_rng = 0;
+ int cutoff_Hz, hp_freq_smth1;
+ int voice_est; /* Probability of voice in Q7 */
+ opus_int32 equiv_rate;
+ int delay_compensation;
+ int frame_rate;
+ opus_int32 max_rate; /* Max bitrate we're allowed to use */
+ int curr_bandwidth;
+ opus_val16 HB_gain;
+ opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
+ int total_buffer;
+ opus_val16 stereo_width;
+ const CELTMode *celt_mode;
+ AnalysisInfo analysis_info;
+ int analysis_read_pos_bak=-1;
+ int analysis_read_subframe_bak=-1;
+ VARDECL(opus_val16, tmp_prefill);
+
+ ALLOC_STACK;
+
+ max_data_bytes = IMIN(1276, out_data_bytes);
+
+ st->rangeFinal = 0;
+ if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
+ 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs)
+ || (400*frame_size < st->Fs)
+ || max_data_bytes<=0
+ )
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ silk_enc = (char*)st+st->silk_enc_offset;
+ celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+
+ lsb_depth = IMIN(lsb_depth, st->lsb_depth);
+
+ analysis_info.valid = 0;
+ celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+#ifndef DISABLE_FLOAT_API
+#ifdef OPUS_FIXED_POINT
+ if (st->silk_mode.complexity >= 10 && st->Fs==48000)
+#else
+ if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+#endif
+ {
+ analysis_read_pos_bak = st->analysis.read_pos;
+ analysis_read_subframe_bak = st->analysis.read_subframe;
+ run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
+ c1, c2, analysis_channels, st->Fs,
+ lsb_depth, downmix, &analysis_info);
+ }
+#endif
+
+ st->voice_ratio = -1;
+
+#ifndef DISABLE_FLOAT_API
+ st->detected_bandwidth = 0;
+ if (analysis_info.valid)
+ {
+ int analysis_bandwidth;
+ if (st->signal_type == OPUS_AUTO)
+ st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
+
+ analysis_bandwidth = analysis_info.bandwidth;
+ if (analysis_bandwidth<=12)
+ st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ else if (analysis_bandwidth<=14)
+ st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ else if (analysis_bandwidth<=16)
+ st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ else if (analysis_bandwidth<=18)
+ st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+ else
+ st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+ }
+#endif
+
+ if (st->channels==2 && st->force_channels!=1)
+ stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
+ else
+ stereo_width = 0;
+ total_buffer = delay_compensation;
+ st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
+
+ frame_rate = st->Fs/frame_size;
+ if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
+ || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
+ {
+ /*If the space is too low to do something useful, emit 'PLC' frames.*/
+ int tocmode = st->mode;
+ int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth;
+ if (tocmode==0)
+ tocmode = MODE_SILK_ONLY;
+ if (frame_rate>100)
+ tocmode = MODE_CELT_ONLY;
+ if (frame_rate < 50)
+ tocmode = MODE_SILK_ONLY;
+ if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND)
+ bw=OPUS_BANDWIDTH_WIDEBAND;
+ else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND)
+ bw=OPUS_BANDWIDTH_NARROWBAND;
+ else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
+ bw=OPUS_BANDWIDTH_SUPERWIDEBAND;
+ data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels);
+ RESTORE_STACK;
+ return 1;
+ }
+ if (!st->use_vbr)
+ {
+ int cbrBytes;
+ cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes);
+ st->bitrate_bps = cbrBytes * (8*frame_rate);
+ max_data_bytes = cbrBytes;
+ }
+ max_rate = frame_rate*max_data_bytes*8;
+
+ /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
+ equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50);
+
+ if (st->signal_type == OPUS_SIGNAL_VOICE)
+ voice_est = 127;
+ else if (st->signal_type == OPUS_SIGNAL_MUSIC)
+ voice_est = 0;
+ else if (st->voice_ratio >= 0)
+ {
+ voice_est = st->voice_ratio*327>>8;
+ /* For AUDIO, never be more than 90% confident of having speech */
+ if (st->application == OPUS_APPLICATION_AUDIO)
+ voice_est = IMIN(voice_est, 115);
+ } else if (st->application == OPUS_APPLICATION_VOIP)
+ voice_est = 115;
+ else
+ voice_est = 48;
+
+ if (st->force_channels!=OPUS_AUTO && st->channels == 2)
+ {
+ st->stream_channels = st->force_channels;
+ } else {
+#ifdef FUZZING
+ /* Random mono/stereo decision */
+ if (st->channels == 2 && (rand()&0x1F)==0)
+ st->stream_channels = 3-st->stream_channels;
+#else
+ /* Rate-dependent mono-stereo decision */
+ if (st->channels == 2)
+ {
+ opus_int32 stereo_threshold;
+ stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
+ if (st->stream_channels == 2)
+ stereo_threshold -= 1000;
+ else
+ stereo_threshold += 1000;
+ st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
+ } else {
+ st->stream_channels = st->channels;
+ }
+#endif
+ }
+ equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50);
+
+ /* Mode selection depending on application and signal type */
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ {
+ st->mode = MODE_CELT_ONLY;
+ } else if (st->user_forced_mode == OPUS_AUTO)
+ {
+#ifdef FUZZING
+ /* Random mode switching */
+ if ((rand()&0xF)==0)
+ {
+ if ((rand()&0x1)==0)
+ st->mode = MODE_CELT_ONLY;
+ else
+ st->mode = MODE_SILK_ONLY;
+ } else {
+ if (st->prev_mode==MODE_CELT_ONLY)
+ st->mode = MODE_CELT_ONLY;
+ else
+ st->mode = MODE_SILK_ONLY;
+ }
+#else
+ opus_int32 mode_voice, mode_music;
+ opus_int32 threshold;
+
+ /* Interpolate based on stereo width */
+ mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0])
+ + MULT16_32_Q15(stereo_width,mode_thresholds[1][0]));
+ mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1])
+ + MULT16_32_Q15(stereo_width,mode_thresholds[1][1]));
+ /* Interpolate based on speech/music probability */
+ threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
+ /* Bias towards SILK for VoIP because of some useful features */
+ if (st->application == OPUS_APPLICATION_VOIP)
+ threshold += 8000;
+
+ /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/
+ /* Hysteresis */
+ if (st->prev_mode == MODE_CELT_ONLY)
+ threshold -= 4000;
+ else if (st->prev_mode>0)
+ threshold += 4000;
+
+ st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
+
+ /* When FEC is enabled and there's enough packet loss, use SILK */
+ if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4)
+ st->mode = MODE_SILK_ONLY;
+ /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */
+ if (st->silk_mode.useDTX && voice_est > 100)
+ st->mode = MODE_SILK_ONLY;
+#endif
+ } else {
+ st->mode = st->user_forced_mode;
+ }
+
+ /* Override the chosen mode to make sure we meet the requested frame size */
+ if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100)
+ st->mode = MODE_CELT_ONLY;
+ if (st->lfe)
+ st->mode = MODE_CELT_ONLY;
+ /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */
+ if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8))
+ st->mode = MODE_CELT_ONLY;
+
+ if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0
+ && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)
+ {
+ /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
+ st->silk_mode.toMono = 1;
+ st->stream_channels = 2;
+ } else {
+ st->silk_mode.toMono = 0;
+ }
+
+ if (st->prev_mode > 0 &&
+ ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ||
+ (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)))
+ {
+ redundancy = 1;
+ celt_to_silk = (st->mode != MODE_CELT_ONLY);
+ if (!celt_to_silk)
+ {
+ /* Switch to SILK/hybrid if frame size is 10 ms or more*/
+ if (frame_size >= st->Fs/100)
+ {
+ st->mode = st->prev_mode;
+ to_celt = 1;
+ } else {
+ redundancy=0;
+ }
+ }
+ }
+ /* For the first frame at a new SILK bandwidth */
+ if (st->silk_bw_switch)
+ {
+ redundancy = 1;
+ celt_to_silk = 1;
+ st->silk_bw_switch = 0;
+ prefill=1;
+ }
+
+ if (redundancy)
+ {
+ /* Fair share of the max size allowed */
+ redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200));
+ /* For VBR, target the actual bitrate (subject to the limit above) */
+ if (st->use_vbr)
+ redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600);
+ }
+
+ if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY)
+ {
+ silk_EncControlStruct dummy;
+ silk_InitEncoder( silk_enc, st->arch, &dummy);
+ prefill=1;
+ }
+
+ /* Automatic (rate-dependent) bandwidth selection */
+ if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
+ {
+ const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
+ opus_int32 bandwidth_thresholds[8];
+ int bandwidth = OPUS_BANDWIDTH_FULLBAND;
+ opus_int32 equiv_rate2;
+
+ equiv_rate2 = equiv_rate;
+ if (st->mode != MODE_CELT_ONLY)
+ {
+ /* Adjust the threshold +/- 10% depending on complexity */
+ equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50;
+ /* CBR is less efficient by ~1 kb/s */
+ if (!st->use_vbr)
+ equiv_rate2 -= 1000;
+ }
+ if (st->channels==2 && st->force_channels!=1)
+ {
+ voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
+ music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
+ } else {
+ voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
+ music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
+ }
+ /* Interpolate bandwidth thresholds depending on voice estimation */
+ for (i=0;i<8;i++)
+ {
+ bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
+ + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
+ }
+ do {
+ int threshold, hysteresis;
+ threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
+ hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1];
+ if (!st->first)
+ {
+ if (st->bandwidth >= bandwidth)
+ threshold -= hysteresis;
+ else
+ threshold += hysteresis;
+ }
+ if (equiv_rate2 >= threshold)
+ break;
+ } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
+ st->bandwidth = bandwidth;
+ /* Prevents any transition to SWB/FB until the SILK layer has fully
+ switched to WB mode and turned the variable LP filter off */
+ if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+ st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ }
+
+ if (st->bandwidth>st->max_bandwidth)
+ st->bandwidth = st->max_bandwidth;
+
+ if (st->user_bandwidth != OPUS_AUTO)
+ st->bandwidth = st->user_bandwidth;
+
+ /* This prevents us from using hybrid at unsafe CBR/max rates */
+ if (st->mode != MODE_CELT_ONLY && max_rate < 15000)
+ {
+ st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND);
+ }
+
+ /* Prevents Opus from wasting bits on frequencies that are above
+ the Nyquist rate of the input signal */
+ if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND)
+ st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+ if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+ st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND)
+ st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND)
+ st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+#ifndef DISABLE_FLOAT_API
+ /* Use detected bandwidth to reduce the encoded bandwidth. */
+ if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO)
+ {
+ int min_detected_bandwidth;
+ /* Makes bandwidth detection more conservative just in case the detector
+ gets it wrong when we could have coded a high bandwidth transparently.
+ When operating in SILK/hybrid mode, we don't go below wideband to avoid
+ more complicated switches that require redundancy. */
+ if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY)
+ min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY)
+ min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ else if (equiv_rate <= 30000*st->stream_channels)
+ min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ else if (equiv_rate <= 44000*st->stream_channels)
+ min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+ else
+ min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+
+ st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth);
+ st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth);
+ }
+#endif
+ celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth));
+
+ /* CELT mode doesn't support mediumband, use wideband instead */
+ if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+ st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ if (st->lfe)
+ st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+
+ /* Can't support higher than wideband for >20 ms frames */
+ if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND))
+ {
+ VARDECL(unsigned char, tmp_data);
+ int nb_frames;
+ int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
+ VARDECL(OpusRepacketizer, rp);
+ opus_int32 bytes_per_frame;
+ opus_int32 repacketize_len;
+
+#ifndef DISABLE_FLOAT_API
+ if (analysis_read_pos_bak!= -1)
+ {
+ st->analysis.read_pos = analysis_read_pos_bak;
+ st->analysis.read_subframe = analysis_read_subframe_bak;
+ }
+#endif
+
+ nb_frames = frame_size > st->Fs/25 ? 3 : 2;
+ bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames);
+
+ ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
+
+ ALLOC(rp, 1, OpusRepacketizer);
+ opus_repacketizer_init(rp);
+
+ bak_mode = st->user_forced_mode;
+ bak_bandwidth = st->user_bandwidth;
+ bak_channels = st->force_channels;
+
+ st->user_forced_mode = st->mode;
+ st->user_bandwidth = st->bandwidth;
+ st->force_channels = st->stream_channels;
+ bak_to_mono = st->silk_mode.toMono;
+
+ if (bak_to_mono)
+ st->force_channels = 1;
+ else
+ st->prev_channels = st->stream_channels;
+ for (i=0;i<nb_frames;i++)
+ {
+ int tmp_len;
+ st->silk_mode.toMono = 0;
+ /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
+ if (to_celt && i==nb_frames-1)
+ st->user_forced_mode = MODE_CELT_ONLY;
+ tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
+ tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
+ NULL, 0, c1, c2, analysis_channels, downmix);
+ if (tmp_len<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);
+ if (ret<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ }
+ if (st->use_vbr)
+ repacketize_len = out_data_bytes;
+ else
+ repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes);
+ ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);
+ if (ret<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ st->user_forced_mode = bak_mode;
+ st->user_bandwidth = bak_bandwidth;
+ st->force_channels = bak_channels;
+ st->silk_mode.toMono = bak_to_mono;
+ RESTORE_STACK;
+ return ret;
+ }
+ curr_bandwidth = st->bandwidth;
+
+ /* Chooses the appropriate mode for speech
+ *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */
+ if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+ st->mode = MODE_HYBRID;
+ if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
+ st->mode = MODE_SILK_ONLY;
+
+ /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */
+ bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1;
+
+ data += 1;
+
+ ec_enc_init(&enc, data, max_data_bytes-1);
+
+ ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
+ for (i=0;i<total_buffer*st->channels;i++)
+ pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
+
+ if (st->mode == MODE_CELT_ONLY)
+ hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+ else
+ hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15;
+
+ st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15,
+ hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) );
+
+ /* convert from log scale to Hertz */
+ cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) );
+
+ if (st->application == OPUS_APPLICATION_VOIP)
+ {
+ hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+ } else {
+ dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+ }
+
+
+
+ /* SILK processing */
+ HB_gain = Q15ONE;
+ if (st->mode != MODE_CELT_ONLY)
+ {
+ opus_int32 total_bitRate, celt_rate;
+#ifdef OPUS_FIXED_POINT
+ const opus_int16 *pcm_silk;
+#else
+ VARDECL(opus_int16, pcm_silk);
+ ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
+#endif
+
+ /* Distribute bits between SILK and CELT */
+ total_bitRate = 8 * bytes_target * frame_rate;
+ if( st->mode == MODE_HYBRID ) {
+ int HB_gain_ref;
+ /* Base rate for SILK */
+ st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) );
+ if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) {
+ /* SILK gets 2/3 of the remaining bits */
+ st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3;
+ } else { /* FULLBAND */
+ /* SILK gets 3/5 of the remaining bits */
+ st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5;
+ }
+ /* Don't let SILK use more than 80% */
+ if( st->silk_mode.bitRate > total_bitRate * 4/5 ) {
+ st->silk_mode.bitRate = total_bitRate * 4/5;
+ }
+ if (!st->energy_masking)
+ {
+ /* Increasingly attenuate high band when it gets allocated fewer bits */
+ celt_rate = total_bitRate - st->silk_mode.bitRate;
+ HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600;
+ HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6);
+ HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
+ }
+ } else {
+ /* SILK gets all bits */
+ st->silk_mode.bitRate = total_bitRate;
+ }
+
+ /* Surround masking for SILK */
+ if (st->energy_masking && st->use_vbr && !st->lfe)
+ {
+ opus_val32 mask_sum=0;
+ opus_val16 masking_depth;
+ opus_int32 rate_offset;
+ int c;
+ int end = 17;
+ opus_int16 srate = 16000;
+ if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
+ {
+ end = 13;
+ srate = 8000;
+ } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+ {
+ end = 15;
+ srate = 12000;
+ }
+ for (c=0;c<st->channels;c++)
+ {
+ for(i=0;i<end;i++)
+ {
+ opus_val16 mask;
+ mask = MAX16(MIN16(st->energy_masking[21*c+i],
+ QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+ if (mask > 0)
+ mask = HALF16(mask);
+ mask_sum += mask;
+ }
+ }
+ /* Conservative rate reduction, we cut the masking in half */
+ masking_depth = mask_sum / end*st->channels;
+ masking_depth += QCONST16(.2f, DB_SHIFT);
+ rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);
+ rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);
+ /* Split the rate change between the SILK and CELT part for hybrid. */
+ if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND)
+ st->silk_mode.bitRate += 3*rate_offset/5;
+ else
+ st->silk_mode.bitRate += rate_offset;
+ bytes_target += rate_offset * frame_size / (8 * st->Fs);
+ }
+
+ st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
+ st->silk_mode.nChannelsAPI = st->channels;
+ st->silk_mode.nChannelsInternal = st->stream_channels;
+ if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+ st->silk_mode.desiredInternalSampleRate = 8000;
+ } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+ st->silk_mode.desiredInternalSampleRate = 12000;
+ } else {
+ silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND );
+ st->silk_mode.desiredInternalSampleRate = 16000;
+ }
+ if( st->mode == MODE_HYBRID ) {
+ /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */
+ st->silk_mode.minInternalSampleRate = 16000;
+ } else {
+ st->silk_mode.minInternalSampleRate = 8000;
+ }
+
+ if (st->mode == MODE_SILK_ONLY)
+ {
+ opus_int32 effective_max_rate = max_rate;
+ st->silk_mode.maxInternalSampleRate = 16000;
+ if (frame_rate > 50)
+ effective_max_rate = effective_max_rate*2/3;
+ if (effective_max_rate < 13000)
+ {
+ st->silk_mode.maxInternalSampleRate = 12000;
+ st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate);
+ }
+ if (effective_max_rate < 9600)
+ {
+ st->silk_mode.maxInternalSampleRate = 8000;
+ st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate);
+ }
+ } else {
+ st->silk_mode.maxInternalSampleRate = 16000;
+ }
+
+ st->silk_mode.useCBR = !st->use_vbr;
+
+ /* Call SILK encoder for the low band */
+ nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes);
+
+ st->silk_mode.maxBits = nBytes*8;
+ /* Only allow up to 90% of the bits for hybrid mode*/
+ if (st->mode == MODE_HYBRID)
+ st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10;
+ if (st->silk_mode.useCBR)
+ {
+ st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8;
+ /* Reduce the initial target to make it easier to reach the CBR rate */
+ st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000);
+ }
+
+ if (prefill)
+ {
+ opus_int32 zero=0;
+ int prefill_offset;
+ /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode
+ a discontinuity. The exact location is what we need to avoid leaving any "gap"
+ in the audio when mixing with the redundant CELT frame. Here we can afford to
+ overwrite st->delay_buffer because the only thing that uses it before it gets
+ rewritten is tmp_prefill[] and even then only the part after the ramp really
+ gets used (rather than sent to the encoder and discarded) */
+ prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
+ gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
+ 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
+ for(i=0;i<prefill_offset;i++)
+ st->delay_buffer[i]=0;
+#ifdef OPUS_FIXED_POINT
+ pcm_silk = st->delay_buffer;
+#else
+ for (i=0;i<st->encoder_buffer*st->channels;i++)
+ pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]);
+#endif
+ silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 );
+ }
+
+#ifdef OPUS_FIXED_POINT
+ pcm_silk = pcm_buf+total_buffer*st->channels;
+#else
+ for (i=0;i<frame_size*st->channels;i++)
+ pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
+#endif
+ ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 );
+ if( ret ) {
+ /*fprintf (stderr, "SILK encode error: %d\n", ret);*/
+ /* Handle error */
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ if (nBytes==0)
+ {
+ st->rangeFinal = 0;
+ data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+ RESTORE_STACK;
+ return 1;
+ }
+ /* Extract SILK internal bandwidth for signaling in first byte */
+ if( st->mode == MODE_SILK_ONLY ) {
+ if( st->silk_mode.internalSampleRate == 8000 ) {
+ curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ } else if( st->silk_mode.internalSampleRate == 12000 ) {
+ curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ } else if( st->silk_mode.internalSampleRate == 16000 ) {
+ curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ }
+ } else {
+ silk_assert( st->silk_mode.internalSampleRate == 16000 );
+ }
+
+ st->silk_mode.opusCanSwitch = st->silk_mode.switchReady;
+ /* FIXME: How do we allocate the redundancy for CBR? */
+ if (st->silk_mode.opusCanSwitch)
+ {
+ redundancy = 1;
+ celt_to_silk = 0;
+ st->silk_bw_switch = 1;
+ }
+ }
+
+ /* CELT processing */
+ {
+ int endband=21;
+
+ switch(curr_bandwidth)
+ {
+ case OPUS_BANDWIDTH_NARROWBAND:
+ endband = 13;
+ break;
+ case OPUS_BANDWIDTH_MEDIUMBAND:
+ case OPUS_BANDWIDTH_WIDEBAND:
+ endband = 17;
+ break;
+ case OPUS_BANDWIDTH_SUPERWIDEBAND:
+ endband = 19;
+ break;
+ case OPUS_BANDWIDTH_FULLBAND:
+ endband = 21;
+ break;
+ }
+ celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband));
+ celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels));
+ }
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX));
+ if (st->mode != MODE_SILK_ONLY)
+ {
+ opus_val32 celt_pred=2;
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
+ /* We may still decide to disable prediction later */
+ if (st->silk_mode.reducedDependency)
+ celt_pred = 0;
+ celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
+
+ if (st->mode == MODE_HYBRID)
+ {
+ int len;
+
+ len = (ec_tell(&enc)+7)>>3;
+ if (redundancy)
+ len += st->mode == MODE_HYBRID ? 3 : 1;
+ if( st->use_vbr ) {
+ nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs);
+ } else {
+ /* check if SILK used up too much */
+ nb_compr_bytes = len > bytes_target ? len : bytes_target;
+ }
+ } else {
+ if (st->use_vbr)
+ {
+ opus_int32 bonus=0;
+#ifndef DISABLE_FLOAT_API
+ if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
+ {
+ bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);
+ if (analysis_info.valid)
+ bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality));
+ }
+#endif
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));
+ nb_compr_bytes = max_data_bytes-1-redundancy_bytes;
+ } else {
+ nb_compr_bytes = bytes_target;
+ }
+ }
+
+ } else {
+ nb_compr_bytes = 0;
+ }
+
+ ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
+ if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
+ {
+ for (i=0;i<st->channels*st->Fs/400;i++)
+ tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+ }
+
+ for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
+ st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
+ for (;i<st->encoder_buffer*st->channels;i++)
+ st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
+
+ /* gain_fade() and stereo_fade() need to be after the buffer copying
+ because we don't want any of this to affect the SILK part */
+ if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
+ gain_fade(pcm_buf, pcm_buf,
+ st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
+ }
+ st->prev_HB_gain = HB_gain;
+ if (st->mode != MODE_HYBRID || st->stream_channels==1)
+ st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000));
+ if( !st->energy_masking && st->channels == 2 ) {
+ /* Apply stereo width reduction (at low bitrates) */
+ if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
+ opus_val16 g1, g2;
+ g1 = st->hybrid_stereo_width_Q14;
+ g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);
+#ifdef OPUS_FIXED_POINT
+ g1 = g1==16384 ? Q15ONE : SHL16(g1,1);
+ g2 = g2==16384 ? Q15ONE : SHL16(g2,1);
+#else
+ g1 *= (1.f/16384);
+ g2 *= (1.f/16384);
+#endif
+ stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
+ frame_size, st->channels, celt_mode->window, st->Fs);
+ st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
+ }
+ }
+
+ if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1))
+ {
+ /* For SILK mode, the redundancy is inferred from the length */
+ if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes))
+ ec_enc_bit_logp(&enc, redundancy, 12);
+ if (redundancy)
+ {
+ int max_redundancy;
+ ec_enc_bit_logp(&enc, celt_to_silk, 1);
+ if (st->mode == MODE_HYBRID)
+ max_redundancy = (max_data_bytes-1)-nb_compr_bytes;
+ else
+ max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3);
+ /* Target the same bit-rate for redundancy as for the rest,
+ up to a max of 257 bytes */
+ redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600);
+ redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes));
+ if (st->mode == MODE_HYBRID)
+ ec_enc_uint(&enc, redundancy_bytes-2, 256);
+ }
+ } else {
+ redundancy = 0;
+ }
+
+ if (!redundancy)
+ {
+ st->silk_bw_switch = 0;
+ redundancy_bytes = 0;
+ }
+ if (st->mode != MODE_CELT_ONLY)start_band=17;
+
+ if (st->mode == MODE_SILK_ONLY)
+ {
+ ret = (ec_tell(&enc)+7)>>3;
+ ec_enc_done(&enc);
+ nb_compr_bytes = ret;
+ } else {
+ nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes);
+ ec_enc_shrink(&enc, nb_compr_bytes);
+ }
+
+#ifndef DISABLE_FLOAT_API
+ if (redundancy || st->mode != MODE_SILK_ONLY)
+ celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+#endif
+
+ /* 5 ms redundant frame for CELT->SILK */
+ if (redundancy && celt_to_silk)
+ {
+ int err;
+ celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
+ err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
+ if (err < 0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
+ celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+ }
+
+ celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band));
+
+ if (st->mode != MODE_SILK_ONLY)
+ {
+ if (st->mode != st->prev_mode && st->prev_mode > 0)
+ {
+ unsigned char dummy[2];
+ celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+
+ /* Prefilling */
+ celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL);
+ celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
+ }
+ /* If false, we already busted the budget and we'll end up with a "PLC packet" */
+ if (ec_tell(&enc) <= 8*nb_compr_bytes)
+ {
+ ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
+ if (ret < 0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ }
+ }
+
+ /* 5 ms redundant frame for SILK->CELT */
+ if (redundancy && !celt_to_silk)
+ {
+ int err;
+ unsigned char dummy[2];
+ int N2, N4;
+ N2 = st->Fs/200;
+ N4 = st->Fs/400;
+
+ celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+ celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
+ celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
+
+ /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
+ celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
+
+ err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
+ if (err < 0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
+ }
+
+
+
+ /* Signalling the mode in the first byte */
+ data--;
+ data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+
+ st->rangeFinal = enc.rng ^ redundant_rng;
+
+ if (to_celt)
+ st->prev_mode = MODE_CELT_ONLY;
+ else
+ st->prev_mode = st->mode;
+ st->prev_channels = st->stream_channels;
+ st->prev_framesize = frame_size;
+
+ st->first = 0;
+
+ /* In the unlikely case that the SILK encoder busted its target, tell
+ the decoder to call the PLC */
+ if (ec_tell(&enc) > (max_data_bytes-1)*8)
+ {
+ if (max_data_bytes < 2)
+ {
+ RESTORE_STACK;
+ return OPUS_BUFFER_TOO_SMALL;
+ }
+ data[1] = 0;
+ ret = 1;
+ st->rangeFinal = 0;
+ } else if (st->mode==MODE_SILK_ONLY&&!redundancy)
+ {
+ /*When in LPC only mode it's perfectly
+ reasonable to strip off trailing zero bytes as
+ the required range decoder behavior is to
+ fill these in. This can't be done when the MDCT
+ modes are used because the decoder needs to know
+ the actual length for allocation purposes.*/
+ while(ret>2&&data[ret]==0)ret--;
+ }
+ /* Count ToC and redundancy */
+ ret += 1+redundancy_bytes;
+ if (!st->use_vbr)
+ {
+ if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
+
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ ret = max_data_bytes;
+ }
+ RESTORE_STACK;
+ return ret;
+}
+
+#ifdef OPUS_FIXED_POINT
+
+#ifndef DISABLE_FLOAT_API
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
+ unsigned char *data, opus_int32 max_data_bytes)
+{
+ int i, ret;
+ int frame_size;
+ int delay_compensation;
+ VARDECL(opus_int16, in);
+ ALLOC_STACK;
+
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_float, st->analysis.subframe_mem);
+
+ ALLOC(in, frame_size*st->channels, opus_int16);
+
+ for (i=0;i<frame_size*st->channels;i++)
+ in[i] = FLOAT2INT16(pcm[i]);
+ ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+ RESTORE_STACK;
+ return ret;
+}
+#endif
+
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
+ unsigned char *data, opus_int32 out_data_bytes)
+{
+ int frame_size;
+ int delay_compensation;
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_int
+#ifndef DISABLE_FLOAT_API
+ , st->analysis.subframe_mem
+#endif
+ );
+ return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+}
+
+#else
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
+ unsigned char *data, opus_int32 max_data_bytes)
+{
+ int i, ret;
+ int frame_size;
+ int delay_compensation;
+ VARDECL(float, in);
+ ALLOC_STACK;
+
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_int, st->analysis.subframe_mem);
+
+ ALLOC(in, frame_size*st->channels, float);
+
+ for (i=0;i<frame_size*st->channels;i++)
+ in[i] = (1.0f/32768)*pcm[i];
+ ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+ RESTORE_STACK;
+ return ret;
+}
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
+ unsigned char *data, opus_int32 out_data_bytes)
+{
+ int frame_size;
+ int delay_compensation;
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+ delay_compensation, downmix_float, st->analysis.subframe_mem);
+ return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
+ pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+}
+#endif
+
+
+int opus_encoder_ctl(OpusEncoder *st, int request, ...)
+{
+ int ret;
+ CELTEncoder *celt_enc;
+ va_list ap;
+
+ ret = OPUS_OK;
+ va_start(ap, request);
+
+ celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+
+ switch (request)
+ {
+ case OPUS_SET_APPLICATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO
+ && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ || (!st->first && st->application != value))
+ {
+ ret = OPUS_BAD_ARG;
+ break;
+ }
+ st->application = value;
+ }
+ break;
+ case OPUS_GET_APPLICATION_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->application;
+ }
+ break;
+ case OPUS_SET_BITRATE_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX)
+ {
+ if (value <= 0)
+ goto bad_arg;
+ else if (value <= 500)
+ value = 500;
+ else if (value > (opus_int32)300000*st->channels)
+ value = (opus_int32)300000*st->channels;
+ }
+ st->user_bitrate_bps = value;
+ }
+ break;
+ case OPUS_GET_BITRATE_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276);
+ }
+ break;
+ case OPUS_SET_FORCE_CHANNELS_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if((value<1 || value>st->channels) && value != OPUS_AUTO)
+ {
+ goto bad_arg;
+ }
+ st->force_channels = value;
+ }
+ break;
+ case OPUS_GET_FORCE_CHANNELS_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->force_channels;
+ }
+ break;
+ case OPUS_SET_MAX_BANDWIDTH_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND)
+ {
+ goto bad_arg;
+ }
+ st->max_bandwidth = value;
+ if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+ st->silk_mode.maxInternalSampleRate = 8000;
+ } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+ st->silk_mode.maxInternalSampleRate = 12000;
+ } else {
+ st->silk_mode.maxInternalSampleRate = 16000;
+ }
+ }
+ break;
+ case OPUS_GET_MAX_BANDWIDTH_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->max_bandwidth;
+ }
+ break;
+ case OPUS_SET_BANDWIDTH_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO)
+ {
+ goto bad_arg;
+ }
+ st->user_bandwidth = value;
+ if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+ st->silk_mode.maxInternalSampleRate = 8000;
+ } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+ st->silk_mode.maxInternalSampleRate = 12000;
+ } else {
+ st->silk_mode.maxInternalSampleRate = 16000;
+ }
+ }
+ break;
+ case OPUS_GET_BANDWIDTH_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->bandwidth;
+ }
+ break;
+ case OPUS_SET_DTX_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>1)
+ {
+ goto bad_arg;
+ }
+ st->silk_mode.useDTX = value;
+ }
+ break;
+ case OPUS_GET_DTX_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->silk_mode.useDTX;
+ }
+ break;
+ case OPUS_SET_COMPLEXITY_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>10)
+ {
+ goto bad_arg;
+ }
+ st->silk_mode.complexity = value;
+ celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value));
+ }
+ break;
+ case OPUS_GET_COMPLEXITY_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->silk_mode.complexity;
+ }
+ break;
+ case OPUS_SET_INBAND_FEC_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>1)
+ {
+ goto bad_arg;
+ }
+ st->silk_mode.useInBandFEC = value;
+ }
+ break;
+ case OPUS_GET_INBAND_FEC_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->silk_mode.useInBandFEC;
+ }
+ break;
+ case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value < 0 || value > 100)
+ {
+ goto bad_arg;
+ }
+ st->silk_mode.packetLossPercentage = value;
+ celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value));
+ }
+ break;
+ case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->silk_mode.packetLossPercentage;
+ }
+ break;
+ case OPUS_SET_VBR_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>1)
+ {
+ goto bad_arg;
+ }
+ st->use_vbr = value;
+ st->silk_mode.useCBR = 1-value;
+ }
+ break;
+ case OPUS_GET_VBR_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->use_vbr;
+ }
+ break;
+ case OPUS_SET_VOICE_RATIO_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<-1 || value>100)
+ {
+ goto bad_arg;
+ }
+ st->voice_ratio = value;
+ }
+ break;
+ case OPUS_GET_VOICE_RATIO_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->voice_ratio;
+ }
+ break;
+ case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>1)
+ {
+ goto bad_arg;
+ }
+ st->vbr_constraint = value;
+ }
+ break;
+ case OPUS_GET_VBR_CONSTRAINT_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->vbr_constraint;
+ }
+ break;
+ case OPUS_SET_SIGNAL_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC)
+ {
+ goto bad_arg;
+ }
+ st->signal_type = value;
+ }
+ break;
+ case OPUS_GET_SIGNAL_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->signal_type;
+ }
+ break;
+ case OPUS_GET_LOOKAHEAD_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->Fs/400;
+ if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ *value += st->delay_compensation;
+ }
+ break;
+ case OPUS_GET_SAMPLE_RATE_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->Fs;
+ }
+ break;
+ case OPUS_GET_FINAL_RANGE_REQUEST:
+ {
+ opus_uint32 *value = va_arg(ap, opus_uint32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->rangeFinal;
+ }
+ break;
+ case OPUS_SET_LSB_DEPTH_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<8 || value>24)
+ {
+ goto bad_arg;
+ }
+ st->lsb_depth=value;
+ }
+ break;
+ case OPUS_GET_LSB_DEPTH_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->lsb_depth;
+ }
+ break;
+ case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS &&
+ value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS &&
+ value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS &&
+ value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE)
+ {
+ goto bad_arg;
+ }
+ st->variable_duration = value;
+ celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value));
+ }
+ break;
+ case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->variable_duration;
+ }
+ break;
+ case OPUS_SET_PREDICTION_DISABLED_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value > 1 || value < 0)
+ goto bad_arg;
+ st->silk_mode.reducedDependency = value;
+ }
+ break;
+ case OPUS_GET_PREDICTION_DISABLED_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ goto bad_arg;
+ *value = st->silk_mode.reducedDependency;
+ }
+ break;
+ case OPUS_RESET_STATE:
+ {
+ void *silk_enc;
+ silk_EncControlStruct dummy;
+ silk_enc = (char*)st+st->silk_enc_offset;
+
+ OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START,
+ sizeof(OpusEncoder)-
+ ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st));
+
+ celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+ silk_InitEncoder( silk_enc, st->arch, &dummy );
+ st->stream_channels = st->channels;
+ st->hybrid_stereo_width_Q14 = 1 << 14;
+ st->prev_HB_gain = Q15ONE;
+ st->first = 1;
+ st->mode = MODE_HYBRID;
+ st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
+ st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+ }
+ break;
+ case OPUS_SET_FORCE_MODE_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO)
+ {
+ goto bad_arg;
+ }
+ st->user_forced_mode = value;
+ }
+ break;
+ case OPUS_SET_LFE_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->lfe = value;
+ ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
+ }
+ break;
+ case OPUS_SET_ENERGY_MASK_REQUEST:
+ {
+ opus_val16 *value = va_arg(ap, opus_val16*);
+ st->energy_masking = value;
+ ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
+ }
+ break;
+
+ case CELT_GET_MODE_REQUEST:
+ {
+ const CELTMode ** value = va_arg(ap, const CELTMode**);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
+ }
+ break;
+ default:
+ /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
+ ret = OPUS_UNIMPLEMENTED;
+ break;
+ }
+ va_end(ap);
+ return ret;
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
+}
+
+void opus_encoder_destroy(OpusEncoder *st)
+{
+ opus_free(st);
+}
diff --git a/drivers/opus/opus_multistream.c b/drivers/opus/opus_multistream.c
new file mode 100644
index 0000000000..8211c0b470
--- /dev/null
+++ b/drivers/opus/opus_multistream.c
@@ -0,0 +1,92 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+
+
+int validate_layout(const ChannelLayout *layout)
+{
+ int i, max_channel;
+
+ max_channel = layout->nb_streams+layout->nb_coupled_streams;
+ if (max_channel>255)
+ return 0;
+ for (i=0;i<layout->nb_channels;i++)
+ {
+ if (layout->mapping[i] >= max_channel && layout->mapping[i] != 255)
+ return 0;
+ }
+ return 1;
+}
+
+
+int get_left_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+ int i;
+ i = (prev<0) ? 0 : prev+1;
+ for (;i<layout->nb_channels;i++)
+ {
+ if (layout->mapping[i]==stream_id*2)
+ return i;
+ }
+ return -1;
+}
+
+int get_right_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+ int i;
+ i = (prev<0) ? 0 : prev+1;
+ for (;i<layout->nb_channels;i++)
+ {
+ if (layout->mapping[i]==stream_id*2+1)
+ return i;
+ }
+ return -1;
+}
+
+int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+ int i;
+ i = (prev<0) ? 0 : prev+1;
+ for (;i<layout->nb_channels;i++)
+ {
+ if (layout->mapping[i]==stream_id+layout->nb_coupled_streams)
+ return i;
+ }
+ return -1;
+}
+
diff --git a/drivers/opus/opus_multistream.h b/drivers/opus/opus_multistream.h
new file mode 100644
index 0000000000..ae5997934a
--- /dev/null
+++ b/drivers/opus/opus_multistream.h
@@ -0,0 +1,660 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus_multistream.h
+ * @brief Opus reference implementation multistream API
+ */
+
+#ifndef OPUS_MULTISTREAM_H
+#define OPUS_MULTISTREAM_H
+
+#include "opus.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @cond OPUS_INTERNAL_DOC */
+
+/** Macros to trigger compilation errors when the wrong types are provided to a
+ * CTL. */
+/**@{*/
+#define __opus_check_encstate_ptr(ptr) ((ptr) + ((ptr) - (OpusEncoder**)(ptr)))
+#define __opus_check_decstate_ptr(ptr) ((ptr) + ((ptr) - (OpusDecoder**)(ptr)))
+/**@}*/
+
+/** These are the actual encoder and decoder CTL ID numbers.
+ * They should not be used directly by applications.
+ * In general, SETs should be even and GETs should be odd.*/
+/**@{*/
+#define OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST 5120
+#define OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST 5122
+/**@}*/
+
+/** @endcond */
+
+/** @defgroup opus_multistream_ctls Multistream specific encoder and decoder CTLs
+ *
+ * These are convenience macros that are specific to the
+ * opus_multistream_encoder_ctl() and opus_multistream_decoder_ctl()
+ * interface.
+ * The CTLs from @ref opus_genericctls, @ref opus_encoderctls, and
+ * @ref opus_decoderctls may be applied to a multistream encoder or decoder as
+ * well.
+ * In addition, you may retrieve the encoder or decoder state for an specific
+ * stream via #OPUS_MULTISTREAM_GET_ENCODER_STATE or
+ * #OPUS_MULTISTREAM_GET_DECODER_STATE and apply CTLs to it individually.
+ */
+/**@{*/
+
+/** Gets the encoder state for an individual stream of a multistream encoder.
+ * @param[in] x <tt>opus_int32</tt>: The index of the stream whose encoder you
+ * wish to retrieve.
+ * This must be non-negative and less than
+ * the <code>streams</code> parameter used
+ * to initialize the encoder.
+ * @param[out] y <tt>OpusEncoder**</tt>: Returns a pointer to the given
+ * encoder state.
+ * @retval OPUS_BAD_ARG The index of the requested stream was out of range.
+ * @hideinitializer
+ */
+#define OPUS_MULTISTREAM_GET_ENCODER_STATE(x,y) OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST, __opus_check_int(x), __opus_check_encstate_ptr(y)
+
+/** Gets the decoder state for an individual stream of a multistream decoder.
+ * @param[in] x <tt>opus_int32</tt>: The index of the stream whose decoder you
+ * wish to retrieve.
+ * This must be non-negative and less than
+ * the <code>streams</code> parameter used
+ * to initialize the decoder.
+ * @param[out] y <tt>OpusDecoder**</tt>: Returns a pointer to the given
+ * decoder state.
+ * @retval OPUS_BAD_ARG The index of the requested stream was out of range.
+ * @hideinitializer
+ */
+#define OPUS_MULTISTREAM_GET_DECODER_STATE(x,y) OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST, __opus_check_int(x), __opus_check_decstate_ptr(y)
+
+/**@}*/
+
+/** @defgroup opus_multistream Opus Multistream API
+ * @{
+ *
+ * The multistream API allows individual Opus streams to be combined into a
+ * single packet, enabling support for up to 255 channels. Unlike an
+ * elementary Opus stream, the encoder and decoder must negotiate the channel
+ * configuration before the decoder can successfully interpret the data in the
+ * packets produced by the encoder. Some basic information, such as packet
+ * duration, can be computed without any special negotiation.
+ *
+ * The format for multistream Opus packets is defined in the
+ * <a href="http://tools.ietf.org/html/draft-terriberry-oggopus">Ogg
+ * encapsulation specification</a> and is based on the self-delimited Opus
+ * framing described in Appendix B of <a href="http://tools.ietf.org/html/rfc6716">RFC 6716</a>.
+ * Normal Opus packets are just a degenerate case of multistream Opus packets,
+ * and can be encoded or decoded with the multistream API by setting
+ * <code>streams</code> to <code>1</code> when initializing the encoder or
+ * decoder.
+ *
+ * Multistream Opus streams can contain up to 255 elementary Opus streams.
+ * These may be either "uncoupled" or "coupled", indicating that the decoder
+ * is configured to decode them to either 1 or 2 channels, respectively.
+ * The streams are ordered so that all coupled streams appear at the
+ * beginning.
+ *
+ * A <code>mapping</code> table defines which decoded channel <code>i</code>
+ * should be used for each input/output (I/O) channel <code>j</code>. This table is
+ * typically provided as an unsigned char array.
+ * Let <code>i = mapping[j]</code> be the index for I/O channel <code>j</code>.
+ * If <code>i < 2*coupled_streams</code>, then I/O channel <code>j</code> is
+ * encoded as the left channel of stream <code>(i/2)</code> if <code>i</code>
+ * is even, or as the right channel of stream <code>(i/2)</code> if
+ * <code>i</code> is odd. Otherwise, I/O channel <code>j</code> is encoded as
+ * mono in stream <code>(i - coupled_streams)</code>, unless it has the special
+ * value 255, in which case it is omitted from the encoding entirely (the
+ * decoder will reproduce it as silence). Each value <code>i</code> must either
+ * be the special value 255 or be less than <code>streams + coupled_streams</code>.
+ *
+ * The output channels specified by the encoder
+ * should use the
+ * <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+ * channel ordering</a>. A decoder may wish to apply an additional permutation
+ * to the mapping the encoder used to achieve a different output channel
+ * order (e.g. for outputing in WAV order).
+ *
+ * Each multistream packet contains an Opus packet for each stream, and all of
+ * the Opus packets in a single multistream packet must have the same
+ * duration. Therefore the duration of a multistream packet can be extracted
+ * from the TOC sequence of the first stream, which is located at the
+ * beginning of the packet, just like an elementary Opus stream:
+ *
+ * @code
+ * int nb_samples;
+ * int nb_frames;
+ * nb_frames = opus_packet_get_nb_frames(data, len);
+ * if (nb_frames < 1)
+ * return nb_frames;
+ * nb_samples = opus_packet_get_samples_per_frame(data, 48000) * nb_frames;
+ * @endcode
+ *
+ * The general encoding and decoding process proceeds exactly the same as in
+ * the normal @ref opus_encoder and @ref opus_decoder APIs.
+ * See their documentation for an overview of how to use the corresponding
+ * multistream functions.
+ */
+
+/** Opus multistream encoder state.
+ * This contains the complete state of a multistream Opus encoder.
+ * It is position independent and can be freely copied.
+ * @see opus_multistream_encoder_create
+ * @see opus_multistream_encoder_init
+ */
+typedef struct OpusMSEncoder OpusMSEncoder;
+
+/** Opus multistream decoder state.
+ * This contains the complete state of a multistream Opus decoder.
+ * It is position independent and can be freely copied.
+ * @see opus_multistream_decoder_create
+ * @see opus_multistream_decoder_init
+ */
+typedef struct OpusMSDecoder OpusMSDecoder;
+
+/**\name Multistream encoder functions */
+/**@{*/
+
+/** Gets the size of an OpusMSEncoder structure.
+ * @param streams <tt>int</tt>: The total number of streams to encode from the
+ * input.
+ * This must be no more than 255.
+ * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+ * to encode.
+ * This must be no larger than the total
+ * number of streams.
+ * Additionally, The total number of
+ * encoded channels (<code>streams +
+ * coupled_streams</code>) must be no
+ * more than 255.
+ * @returns The size in bytes on success, or a negative error code
+ * (see @ref opus_errorcodes) on error.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_encoder_get_size(
+ int streams,
+ int coupled_streams
+);
+
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_surround_encoder_get_size(
+ int channels,
+ int mapping_family
+);
+
+
+/** Allocates and initializes a multistream encoder state.
+ * Call opus_multistream_encoder_destroy() to release
+ * this object when finished.
+ * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param channels <tt>int</tt>: Number of channels in the input signal.
+ * This must be at most 255.
+ * It may be greater than the number of
+ * coded channels (<code>streams +
+ * coupled_streams</code>).
+ * @param streams <tt>int</tt>: The total number of streams to encode from the
+ * input.
+ * This must be no more than the number of channels.
+ * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+ * to encode.
+ * This must be no larger than the total
+ * number of streams.
+ * Additionally, The total number of
+ * encoded channels (<code>streams +
+ * coupled_streams</code>) must be no
+ * more than the number of input channels.
+ * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+ * encoded channels to input channels, as described in
+ * @ref opus_multistream. As an extra constraint, the
+ * multistream encoder does not allow encoding coupled
+ * streams for which one channel is unused since this
+ * is never a good idea.
+ * @param application <tt>int</tt>: The target encoder application.
+ * This must be one of the following:
+ * <dl>
+ * <dt>#OPUS_APPLICATION_VOIP</dt>
+ * <dd>Process signal for improved speech intelligibility.</dd>
+ * <dt>#OPUS_APPLICATION_AUDIO</dt>
+ * <dd>Favor faithfulness to the original input.</dd>
+ * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+ * <dd>Configure the minimum possible coding delay by disabling certain modes
+ * of operation.</dd>
+ * </dl>
+ * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+ * code (see @ref opus_errorcodes) on
+ * failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_encoder_create(
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping,
+ int application,
+ int *error
+) OPUS_ARG_NONNULL(5);
+
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_encoder_create(
+ opus_int32 Fs,
+ int channels,
+ int mapping_family,
+ int *streams,
+ int *coupled_streams,
+ unsigned char *mapping,
+ int application,
+ int *error
+) OPUS_ARG_NONNULL(5);
+
+/** Initialize a previously allocated multistream encoder state.
+ * The memory pointed to by \a st must be at least the size returned by
+ * opus_multistream_encoder_get_size().
+ * This is intended for applications which use their own allocator instead of
+ * malloc.
+ * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+ * @see opus_multistream_encoder_create
+ * @see opus_multistream_encoder_get_size
+ * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize.
+ * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param channels <tt>int</tt>: Number of channels in the input signal.
+ * This must be at most 255.
+ * It may be greater than the number of
+ * coded channels (<code>streams +
+ * coupled_streams</code>).
+ * @param streams <tt>int</tt>: The total number of streams to encode from the
+ * input.
+ * This must be no more than the number of channels.
+ * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+ * to encode.
+ * This must be no larger than the total
+ * number of streams.
+ * Additionally, The total number of
+ * encoded channels (<code>streams +
+ * coupled_streams</code>) must be no
+ * more than the number of input channels.
+ * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+ * encoded channels to input channels, as described in
+ * @ref opus_multistream. As an extra constraint, the
+ * multistream encoder does not allow encoding coupled
+ * streams for which one channel is unused since this
+ * is never a good idea.
+ * @param application <tt>int</tt>: The target encoder application.
+ * This must be one of the following:
+ * <dl>
+ * <dt>#OPUS_APPLICATION_VOIP</dt>
+ * <dd>Process signal for improved speech intelligibility.</dd>
+ * <dt>#OPUS_APPLICATION_AUDIO</dt>
+ * <dd>Favor faithfulness to the original input.</dd>
+ * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+ * <dd>Configure the minimum possible coding delay by disabling certain modes
+ * of operation.</dd>
+ * </dl>
+ * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+ * on failure.
+ */
+OPUS_EXPORT int opus_multistream_encoder_init(
+ OpusMSEncoder *st,
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping,
+ int application
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+OPUS_EXPORT int opus_multistream_surround_encoder_init(
+ OpusMSEncoder *st,
+ opus_int32 Fs,
+ int channels,
+ int mapping_family,
+ int *streams,
+ int *coupled_streams,
+ unsigned char *mapping,
+ int application
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+/** Encodes a multistream Opus frame.
+ * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+ * @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved
+ * samples.
+ * This must contain
+ * <code>frame_size*channels</code>
+ * samples.
+ * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+ * signal.
+ * This must be an Opus frame size for the
+ * encoder's sampling rate.
+ * For example, at 48 kHz the permitted values
+ * are 120, 240, 480, 960, 1920, and 2880.
+ * Passing in a duration of less than 10 ms
+ * (480 samples at 48 kHz) will prevent the
+ * encoder from using the LPC or hybrid modes.
+ * @param[out] data <tt>unsigned char*</tt>: Output payload.
+ * This must contain storage for at
+ * least \a max_data_bytes.
+ * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+ * memory for the output
+ * payload. This may be
+ * used to impose an upper limit on
+ * the instant bitrate, but should
+ * not be used as the only bitrate
+ * control. Use #OPUS_SET_BITRATE to
+ * control the bitrate.
+ * @returns The length of the encoded packet (in bytes) on success or a
+ * negative error code (see @ref opus_errorcodes) on failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode(
+ OpusMSEncoder *st,
+ const opus_int16 *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes a multistream Opus frame from floating point input.
+ * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+ * @param[in] pcm <tt>const float*</tt>: The input signal as interleaved
+ * samples with a normal range of
+ * +/-1.0.
+ * Samples with a range beyond +/-1.0
+ * are supported but will be clipped by
+ * decoders using the integer API and
+ * should only be used if it is known
+ * that the far end supports extended
+ * dynamic range.
+ * This must contain
+ * <code>frame_size*channels</code>
+ * samples.
+ * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+ * signal.
+ * This must be an Opus frame size for the
+ * encoder's sampling rate.
+ * For example, at 48 kHz the permitted values
+ * are 120, 240, 480, 960, 1920, and 2880.
+ * Passing in a duration of less than 10 ms
+ * (480 samples at 48 kHz) will prevent the
+ * encoder from using the LPC or hybrid modes.
+ * @param[out] data <tt>unsigned char*</tt>: Output payload.
+ * This must contain storage for at
+ * least \a max_data_bytes.
+ * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+ * memory for the output
+ * payload. This may be
+ * used to impose an upper limit on
+ * the instant bitrate, but should
+ * not be used as the only bitrate
+ * control. Use #OPUS_SET_BITRATE to
+ * control the bitrate.
+ * @returns The length of the encoded packet (in bytes) on success or a
+ * negative error code (see @ref opus_errorcodes) on failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode_float(
+ OpusMSEncoder *st,
+ const float *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Frees an <code>OpusMSEncoder</code> allocated by
+ * opus_multistream_encoder_create().
+ * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to be freed.
+ */
+OPUS_EXPORT void opus_multistream_encoder_destroy(OpusMSEncoder *st);
+
+/** Perform a CTL function on a multistream Opus encoder.
+ *
+ * Generally the request and subsequent arguments are generated by a
+ * convenience macro.
+ * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+ * @param request This and all remaining parameters should be replaced by one
+ * of the convenience macros in @ref opus_genericctls,
+ * @ref opus_encoderctls, or @ref opus_multistream_ctls.
+ * @see opus_genericctls
+ * @see opus_encoderctls
+ * @see opus_multistream_ctls
+ */
+OPUS_EXPORT int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/**@}*/
+
+/**\name Multistream decoder functions */
+/**@{*/
+
+/** Gets the size of an <code>OpusMSDecoder</code> structure.
+ * @param streams <tt>int</tt>: The total number of streams coded in the
+ * input.
+ * This must be no more than 255.
+ * @param coupled_streams <tt>int</tt>: Number streams to decode as coupled
+ * (2 channel) streams.
+ * This must be no larger than the total
+ * number of streams.
+ * Additionally, The total number of
+ * coded channels (<code>streams +
+ * coupled_streams</code>) must be no
+ * more than 255.
+ * @returns The size in bytes on success, or a negative error code
+ * (see @ref opus_errorcodes) on error.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_decoder_get_size(
+ int streams,
+ int coupled_streams
+);
+
+/** Allocates and initializes a multistream decoder state.
+ * Call opus_multistream_decoder_destroy() to release
+ * this object when finished.
+ * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param channels <tt>int</tt>: Number of channels to output.
+ * This must be at most 255.
+ * It may be different from the number of coded
+ * channels (<code>streams +
+ * coupled_streams</code>).
+ * @param streams <tt>int</tt>: The total number of streams coded in the
+ * input.
+ * This must be no more than 255.
+ * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+ * (2 channel) streams.
+ * This must be no larger than the total
+ * number of streams.
+ * Additionally, The total number of
+ * coded channels (<code>streams +
+ * coupled_streams</code>) must be no
+ * more than 255.
+ * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+ * coded channels to output channels, as described in
+ * @ref opus_multistream.
+ * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+ * code (see @ref opus_errorcodes) on
+ * failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSDecoder *opus_multistream_decoder_create(
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping,
+ int *error
+) OPUS_ARG_NONNULL(5);
+
+/** Intialize a previously allocated decoder state object.
+ * The memory pointed to by \a st must be at least the size returned by
+ * opus_multistream_encoder_get_size().
+ * This is intended for applications which use their own allocator instead of
+ * malloc.
+ * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+ * @see opus_multistream_decoder_create
+ * @see opus_multistream_deocder_get_size
+ * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize.
+ * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+ * This must be one of 8000, 12000, 16000,
+ * 24000, or 48000.
+ * @param channels <tt>int</tt>: Number of channels to output.
+ * This must be at most 255.
+ * It may be different from the number of coded
+ * channels (<code>streams +
+ * coupled_streams</code>).
+ * @param streams <tt>int</tt>: The total number of streams coded in the
+ * input.
+ * This must be no more than 255.
+ * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+ * (2 channel) streams.
+ * This must be no larger than the total
+ * number of streams.
+ * Additionally, The total number of
+ * coded channels (<code>streams +
+ * coupled_streams</code>) must be no
+ * more than 255.
+ * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+ * coded channels to output channels, as described in
+ * @ref opus_multistream.
+ * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+ * on failure.
+ */
+OPUS_EXPORT int opus_multistream_decoder_init(
+ OpusMSDecoder *st,
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+/** Decode a multistream Opus packet.
+ * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+ * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+ * Use a <code>NULL</code>
+ * pointer to indicate packet
+ * loss.
+ * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+ * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+ * samples.
+ * This must contain room for
+ * <code>frame_size*channels</code>
+ * samples.
+ * @param frame_size <tt>int</tt>: The number of samples per channel of
+ * available space in \a pcm.
+ * If this is less than the maximum packet duration
+ * (120 ms; 5760 for 48kHz), this function will not be capable
+ * of decoding some packets. In the case of PLC (data==NULL)
+ * or FEC (decode_fec=1), then frame_size needs to be exactly
+ * the duration of audio that is missing, otherwise the
+ * decoder will not be in the optimal state to decode the
+ * next incoming packet. For the PLC and FEC cases, frame_size
+ * <b>must</b> be a multiple of 2.5 ms.
+ * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+ * forward error correction data be decoded.
+ * If no such data is available, the frame is
+ * decoded as if it were lost.
+ * @returns Number of samples decoded on success or a negative error code
+ * (see @ref opus_errorcodes) on failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode(
+ OpusMSDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ opus_int16 *pcm,
+ int frame_size,
+ int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode a multistream Opus packet with floating point output.
+ * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+ * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+ * Use a <code>NULL</code>
+ * pointer to indicate packet
+ * loss.
+ * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+ * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+ * samples.
+ * This must contain room for
+ * <code>frame_size*channels</code>
+ * samples.
+ * @param frame_size <tt>int</tt>: The number of samples per channel of
+ * available space in \a pcm.
+ * If this is less than the maximum packet duration
+ * (120 ms; 5760 for 48kHz), this function will not be capable
+ * of decoding some packets. In the case of PLC (data==NULL)
+ * or FEC (decode_fec=1), then frame_size needs to be exactly
+ * the duration of audio that is missing, otherwise the
+ * decoder will not be in the optimal state to decode the
+ * next incoming packet. For the PLC and FEC cases, frame_size
+ * <b>must</b> be a multiple of 2.5 ms.
+ * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+ * forward error correction data be decoded.
+ * If no such data is available, the frame is
+ * decoded as if it were lost.
+ * @returns Number of samples decoded on success or a negative error code
+ * (see @ref opus_errorcodes) on failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode_float(
+ OpusMSDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ float *pcm,
+ int frame_size,
+ int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on a multistream Opus decoder.
+ *
+ * Generally the request and subsequent arguments are generated by a
+ * convenience macro.
+ * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+ * @param request This and all remaining parameters should be replaced by one
+ * of the convenience macros in @ref opus_genericctls,
+ * @ref opus_decoderctls, or @ref opus_multistream_ctls.
+ * @see opus_genericctls
+ * @see opus_decoderctls
+ * @see opus_multistream_ctls
+ */
+OPUS_EXPORT int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/** Frees an <code>OpusMSDecoder</code> allocated by
+ * opus_multistream_decoder_create().
+ * @param st <tt>OpusMSDecoder</tt>: Multistream decoder state to be freed.
+ */
+OPUS_EXPORT void opus_multistream_decoder_destroy(OpusMSDecoder *st);
+
+/**@}*/
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_MULTISTREAM_H */
diff --git a/drivers/opus/opus_multistream_decoder.c b/drivers/opus/opus_multistream_decoder.c
new file mode 100644
index 0000000000..64a0c24067
--- /dev/null
+++ b/drivers/opus/opus_multistream_decoder.c
@@ -0,0 +1,537 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+
+struct OpusMSDecoder {
+ ChannelLayout layout;
+ /* Decoder states go here */
+};
+
+
+
+
+/* DECODER */
+
+opus_int32 opus_multistream_decoder_get_size(int nb_streams, int nb_coupled_streams)
+{
+ int coupled_size;
+ int mono_size;
+
+ if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0;
+ coupled_size = opus_decoder_get_size(2);
+ mono_size = opus_decoder_get_size(1);
+ return align(sizeof(OpusMSDecoder))
+ + nb_coupled_streams * align(coupled_size)
+ + (nb_streams-nb_coupled_streams) * align(mono_size);
+}
+
+int opus_multistream_decoder_init(
+ OpusMSDecoder *st,
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping
+)
+{
+ int coupled_size;
+ int mono_size;
+ int i, ret;
+ char *ptr;
+
+ if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+ (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+ return OPUS_BAD_ARG;
+
+ st->layout.nb_channels = channels;
+ st->layout.nb_streams = streams;
+ st->layout.nb_coupled_streams = coupled_streams;
+
+ for (i=0;i<st->layout.nb_channels;i++)
+ st->layout.mapping[i] = mapping[i];
+ if (!validate_layout(&st->layout))
+ return OPUS_BAD_ARG;
+
+ ptr = (char*)st + align(sizeof(OpusMSDecoder));
+ coupled_size = opus_decoder_get_size(2);
+ mono_size = opus_decoder_get_size(1);
+
+ for (i=0;i<st->layout.nb_coupled_streams;i++)
+ {
+ ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 2);
+ if(ret!=OPUS_OK)return ret;
+ ptr += align(coupled_size);
+ }
+ for (;i<st->layout.nb_streams;i++)
+ {
+ ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 1);
+ if(ret!=OPUS_OK)return ret;
+ ptr += align(mono_size);
+ }
+ return OPUS_OK;
+}
+
+
+OpusMSDecoder *opus_multistream_decoder_create(
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping,
+ int *error
+)
+{
+ int ret;
+ OpusMSDecoder *st;
+ if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+ (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+ st = (OpusMSDecoder *)opus_alloc(opus_multistream_decoder_get_size(streams, coupled_streams));
+ if (st==NULL)
+ {
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ return NULL;
+ }
+ ret = opus_multistream_decoder_init(st, Fs, channels, streams, coupled_streams, mapping);
+ if (error)
+ *error = ret;
+ if (ret != OPUS_OK)
+ {
+ opus_free(st);
+ st = NULL;
+ }
+ return st;
+}
+
+typedef void (*opus_copy_channel_out_func)(
+ void *dst,
+ int dst_stride,
+ int dst_channel,
+ const opus_val16 *src,
+ int src_stride,
+ int frame_size
+);
+
+static int opus_multistream_packet_validate(const unsigned char *data,
+ opus_int32 len, int nb_streams, opus_int32 Fs)
+{
+ int s;
+ int count;
+ unsigned char toc;
+ opus_int16 size[48];
+ int samples=0;
+ opus_int32 packet_offset;
+
+ for (s=0;s<nb_streams;s++)
+ {
+ int tmp_samples;
+ if (len<=0)
+ return OPUS_INVALID_PACKET;
+ count = opus_packet_parse_impl(data, len, s!=nb_streams-1, &toc, NULL,
+ size, NULL, &packet_offset);
+ if (count<0)
+ return count;
+ tmp_samples = opus_packet_get_nb_samples(data, packet_offset, Fs);
+ if (s!=0 && samples != tmp_samples)
+ return OPUS_INVALID_PACKET;
+ samples = tmp_samples;
+ data += packet_offset;
+ len -= packet_offset;
+ }
+ return samples;
+}
+
+static int opus_multistream_decode_native(
+ OpusMSDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ void *pcm,
+ opus_copy_channel_out_func copy_channel_out,
+ int frame_size,
+ int decode_fec,
+ int soft_clip
+)
+{
+ opus_int32 Fs;
+ int coupled_size;
+ int mono_size;
+ int s, c;
+ char *ptr;
+ int do_plc=0;
+ VARDECL(opus_val16, buf);
+ ALLOC_STACK;
+
+ /* Limit frame_size to avoid excessive stack allocations. */
+ opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs));
+ frame_size = IMIN(frame_size, Fs/25*3);
+ ALLOC(buf, 2*frame_size, opus_val16);
+ ptr = (char*)st + align(sizeof(OpusMSDecoder));
+ coupled_size = opus_decoder_get_size(2);
+ mono_size = opus_decoder_get_size(1);
+
+ if (len==0)
+ do_plc = 1;
+ if (len < 0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ if (!do_plc && len < 2*st->layout.nb_streams-1)
+ {
+ RESTORE_STACK;
+ return OPUS_INVALID_PACKET;
+ }
+ if (!do_plc)
+ {
+ int ret = opus_multistream_packet_validate(data, len, st->layout.nb_streams, Fs);
+ if (ret < 0)
+ {
+ RESTORE_STACK;
+ return ret;
+ } else if (ret > frame_size)
+ {
+ RESTORE_STACK;
+ return OPUS_BUFFER_TOO_SMALL;
+ }
+ }
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusDecoder *dec;
+ int packet_offset, ret;
+
+ dec = (OpusDecoder*)ptr;
+ ptr += (s < st->layout.nb_coupled_streams) ? align(coupled_size) : align(mono_size);
+
+ if (!do_plc && len<=0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ packet_offset = 0;
+ ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip);
+ data += packet_offset;
+ len -= packet_offset;
+ if (ret <= 0)
+ {
+ RESTORE_STACK;
+ return ret;
+ }
+ frame_size = ret;
+ if (s < st->layout.nb_coupled_streams)
+ {
+ int chan, prev;
+ prev = -1;
+ /* Copy "left" audio to the channel(s) where it belongs */
+ while ( (chan = get_left_channel(&st->layout, s, prev)) != -1)
+ {
+ (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+ buf, 2, frame_size);
+ prev = chan;
+ }
+ prev = -1;
+ /* Copy "right" audio to the channel(s) where it belongs */
+ while ( (chan = get_right_channel(&st->layout, s, prev)) != -1)
+ {
+ (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+ buf+1, 2, frame_size);
+ prev = chan;
+ }
+ } else {
+ int chan, prev;
+ prev = -1;
+ /* Copy audio to the channel(s) where it belongs */
+ while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1)
+ {
+ (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+ buf, 1, frame_size);
+ prev = chan;
+ }
+ }
+ }
+ /* Handle muted channels */
+ for (c=0;c<st->layout.nb_channels;c++)
+ {
+ if (st->layout.mapping[c] == 255)
+ {
+ (*copy_channel_out)(pcm, st->layout.nb_channels, c,
+ NULL, 0, frame_size);
+ }
+ }
+ RESTORE_STACK;
+ return frame_size;
+}
+
+#if !defined(DISABLE_FLOAT_API)
+static void opus_copy_channel_out_float(
+ void *dst,
+ int dst_stride,
+ int dst_channel,
+ const opus_val16 *src,
+ int src_stride,
+ int frame_size
+)
+{
+ float *float_dst;
+ opus_int32 i;
+ float_dst = (float*)dst;
+ if (src != NULL)
+ {
+ for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+ float_dst[i*dst_stride+dst_channel] = (1/32768.f)*src[i*src_stride];
+#else
+ float_dst[i*dst_stride+dst_channel] = src[i*src_stride];
+#endif
+ }
+ else
+ {
+ for (i=0;i<frame_size;i++)
+ float_dst[i*dst_stride+dst_channel] = 0;
+ }
+}
+#endif
+
+static void opus_copy_channel_out_short(
+ void *dst,
+ int dst_stride,
+ int dst_channel,
+ const opus_val16 *src,
+ int src_stride,
+ int frame_size
+)
+{
+ opus_int16 *short_dst;
+ opus_int32 i;
+ short_dst = (opus_int16*)dst;
+ if (src != NULL)
+ {
+ for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+ short_dst[i*dst_stride+dst_channel] = src[i*src_stride];
+#else
+ short_dst[i*dst_stride+dst_channel] = FLOAT2INT16(src[i*src_stride]);
+#endif
+ }
+ else
+ {
+ for (i=0;i<frame_size;i++)
+ short_dst[i*dst_stride+dst_channel] = 0;
+ }
+}
+
+
+
+#ifdef OPUS_FIXED_POINT
+int opus_multistream_decode(
+ OpusMSDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ opus_int16 *pcm,
+ int frame_size,
+ int decode_fec
+)
+{
+ return opus_multistream_decode_native(st, data, len,
+ pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_multistream_decode_float(OpusMSDecoder *st, const unsigned char *data,
+ opus_int32 len, float *pcm, int frame_size, int decode_fec)
+{
+ return opus_multistream_decode_native(st, data, len,
+ pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+}
+#endif
+
+#else
+
+int opus_multistream_decode(OpusMSDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
+{
+ return opus_multistream_decode_native(st, data, len,
+ pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1);
+}
+
+int opus_multistream_decode_float(
+ OpusMSDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ float *pcm,
+ int frame_size,
+ int decode_fec
+)
+{
+ return opus_multistream_decode_native(st, data, len,
+ pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+}
+#endif
+
+int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...)
+{
+ va_list ap;
+ int coupled_size, mono_size;
+ char *ptr;
+ int ret = OPUS_OK;
+
+ va_start(ap, request);
+
+ coupled_size = opus_decoder_get_size(2);
+ mono_size = opus_decoder_get_size(1);
+ ptr = (char*)st + align(sizeof(OpusMSDecoder));
+ switch (request)
+ {
+ case OPUS_GET_BANDWIDTH_REQUEST:
+ case OPUS_GET_SAMPLE_RATE_REQUEST:
+ case OPUS_GET_GAIN_REQUEST:
+ case OPUS_GET_LAST_PACKET_DURATION_REQUEST:
+ {
+ OpusDecoder *dec;
+ /* For int32* GET params, just query the first stream */
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ dec = (OpusDecoder*)ptr;
+ ret = opus_decoder_ctl(dec, request, value);
+ }
+ break;
+ case OPUS_GET_FINAL_RANGE_REQUEST:
+ {
+ int s;
+ opus_uint32 *value = va_arg(ap, opus_uint32*);
+ opus_uint32 tmp;
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = 0;
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusDecoder *dec;
+ dec = (OpusDecoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ ret = opus_decoder_ctl(dec, request, &tmp);
+ if (ret != OPUS_OK) break;
+ *value ^= tmp;
+ }
+ }
+ break;
+ case OPUS_RESET_STATE:
+ {
+ int s;
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusDecoder *dec;
+
+ dec = (OpusDecoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ ret = opus_decoder_ctl(dec, OPUS_RESET_STATE);
+ if (ret != OPUS_OK)
+ break;
+ }
+ }
+ break;
+ case OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST:
+ {
+ int s;
+ opus_int32 stream_id;
+ OpusDecoder **value;
+ stream_id = va_arg(ap, opus_int32);
+ if (stream_id<0 || stream_id >= st->layout.nb_streams)
+ ret = OPUS_BAD_ARG;
+ value = va_arg(ap, OpusDecoder**);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ for (s=0;s<stream_id;s++)
+ {
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ }
+ *value = (OpusDecoder*)ptr;
+ }
+ break;
+ case OPUS_SET_GAIN_REQUEST:
+ {
+ int s;
+ /* This works for int32 params */
+ opus_int32 value = va_arg(ap, opus_int32);
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusDecoder *dec;
+
+ dec = (OpusDecoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ ret = opus_decoder_ctl(dec, request, value);
+ if (ret != OPUS_OK)
+ break;
+ }
+ }
+ break;
+ default:
+ ret = OPUS_UNIMPLEMENTED;
+ break;
+ }
+
+ va_end(ap);
+ return ret;
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
+}
+
+
+void opus_multistream_decoder_destroy(OpusMSDecoder *st)
+{
+ opus_free(st);
+}
diff --git a/drivers/opus/opus_multistream_encoder.c b/drivers/opus/opus_multistream_encoder.c
new file mode 100644
index 0000000000..8d559743ea
--- /dev/null
+++ b/drivers/opus/opus_multistream_encoder.c
@@ -0,0 +1,1174 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "mdct.h"
+#include "opus_modes.h"
+#include "bands.h"
+#include "quant_bands.h"
+
+typedef struct {
+ int nb_streams;
+ int nb_coupled_streams;
+ unsigned char mapping[8];
+} VorbisLayout;
+
+/* Index is nb_channel-1*/
+static const VorbisLayout vorbis_mappings[8] = {
+ {1, 0, {0}}, /* 1: mono */
+ {1, 1, {0, 1}}, /* 2: stereo */
+ {2, 1, {0, 2, 1}}, /* 3: 1-d surround */
+ {2, 2, {0, 1, 2, 3}}, /* 4: quadraphonic surround */
+ {3, 2, {0, 4, 1, 2, 3}}, /* 5: 5-channel surround */
+ {4, 2, {0, 4, 1, 2, 3, 5}}, /* 6: 5.1 surround */
+ {4, 3, {0, 4, 1, 2, 3, 5, 6}}, /* 7: 6.1 surround */
+ {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */
+};
+
+typedef void (*opus_copy_channel_in_func)(
+ opus_val16 *dst,
+ int dst_stride,
+ const void *src,
+ int src_stride,
+ int src_channel,
+ int frame_size
+);
+
+struct OpusMSEncoder {
+ ChannelLayout layout;
+ int lfe_stream;
+ int application;
+ int variable_duration;
+ int surround;
+ opus_int32 bitrate_bps;
+ float subframe_mem[3];
+ /* Encoder states go here */
+ /* then opus_val32 window_mem[channels*120]; */
+ /* then opus_val32 preemph_mem[channels]; */
+};
+
+static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
+{
+ int s;
+ char *ptr;
+ int coupled_size, mono_size;
+
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ }
+ return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
+}
+
+static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
+{
+ int s;
+ char *ptr;
+ int coupled_size, mono_size;
+
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ }
+ return (opus_val32*)ptr;
+}
+
+static int validate_encoder_layout(const ChannelLayout *layout)
+{
+ int s;
+ for (s=0;s<layout->nb_streams;s++)
+ {
+ if (s < layout->nb_coupled_streams)
+ {
+ if (get_left_channel(layout, s, -1)==-1)
+ return 0;
+ if (get_right_channel(layout, s, -1)==-1)
+ return 0;
+ } else {
+ if (get_mono_channel(layout, s, -1)==-1)
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void channel_pos(int channels, int pos[8])
+{
+ /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
+ if (channels==4)
+ {
+ pos[0]=1;
+ pos[1]=3;
+ pos[2]=1;
+ pos[3]=3;
+ } else if (channels==3||channels==5||channels==6)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=0;
+ } else if (channels==7)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=2;
+ pos[6]=0;
+ } else if (channels==8)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=1;
+ pos[6]=3;
+ pos[7]=0;
+ }
+}
+
+#if 1
+/* Computes a rough approximation of log2(2^a + 2^b) */
+static opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+ opus_val16 max;
+ opus_val32 diff;
+ opus_val16 frac;
+ static const opus_val16 diff_table[17] = {
+ QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT),
+ QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT),
+ QCONST16(0.0028123f, DB_SHIFT)
+ };
+ int low;
+ if (a>b)
+ {
+ max = a;
+ diff = SUB32(EXTEND32(a),EXTEND32(b));
+ } else {
+ max = b;
+ diff = SUB32(EXTEND32(b),EXTEND32(a));
+ }
+ if (diff >= QCONST16(8.f, DB_SHIFT))
+ return max;
+#ifdef OPUS_FIXED_POINT
+ low = SHR32(diff, DB_SHIFT-1);
+ frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT);
+#else
+ low = (int)floor(2*diff);
+ frac = 2*diff - low;
+#endif
+ return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low]));
+}
+#else
+opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+ return log2(pow(4, a)+ pow(4, b))/2;
+}
+#endif
+
+void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
+ int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+)
+{
+ int c;
+ int i;
+ int LM;
+ int pos[8] = {0};
+ int upsample;
+ int frame_size;
+ opus_val16 channel_offset;
+ opus_val32 bandE[21];
+ opus_val16 maskLogE[3][21];
+ VARDECL(opus_val32, in);
+ VARDECL(opus_val16, x);
+ VARDECL(opus_val32, freq);
+ SAVE_STACK;
+
+ upsample = resampling_factor(rate);
+ frame_size = len*upsample;
+
+ for (LM=0;LM<celt_mode->maxLM;LM++)
+ if (celt_mode->shortMdctSize<<LM==frame_size)
+ break;
+
+ ALLOC(in, frame_size+overlap, opus_val32);
+ ALLOC(x, len, opus_val16);
+ ALLOC(freq, frame_size, opus_val32);
+
+ channel_pos(channels, pos);
+
+ for (c=0;c<3;c++)
+ for (i=0;i<21;i++)
+ maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT);
+
+ for (c=0;c<channels;c++)
+ {
+ OPUS_COPY(in, mem+c*overlap, overlap);
+ (*copy_channel_in)(x, 1, pcm, channels, c, len);
+ celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
+ clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+ if (upsample != 1)
+ {
+ int bound = len;
+ for (i=0;i<bound;i++)
+ freq[i] *= upsample;
+ for (;i<frame_size;i++)
+ freq[i] = 0;
+ }
+
+ compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);
+ amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);
+ /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */
+ for (i=1;i<21;i++)
+ bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT));
+ for (i=19;i>=0;i--)
+ bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT));
+ if (pos[c]==1)
+ {
+ for (i=0;i<21;i++)
+ maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]);
+ } else if (pos[c]==3)
+ {
+ for (i=0;i<21;i++)
+ maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]);
+ } else if (pos[c]==2)
+ {
+ for (i=0;i<21;i++)
+ {
+ maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+ maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+ }
+ }
+#if 0
+ for (i=0;i<21;i++)
+ printf("%f ", bandLogE[21*c+i]);
+ float sum=0;
+ for (i=0;i<21;i++)
+ sum += bandLogE[21*c+i];
+ printf("%f ", sum/21);
+#endif
+ OPUS_COPY(mem+c*overlap, in+frame_size, overlap);
+ }
+ for (i=0;i<21;i++)
+ maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]);
+ channel_offset = HALF16(celt_log2(QCONST32(2.f,14)/(channels-1)));
+ for (c=0;c<3;c++)
+ for (i=0;i<21;i++)
+ maskLogE[c][i] += channel_offset;
+#if 0
+ for (c=0;c<3;c++)
+ {
+ for (i=0;i<21;i++)
+ printf("%f ", maskLogE[c][i]);
+ }
+#endif
+ for (c=0;c<channels;c++)
+ {
+ opus_val16 *mask;
+ if (pos[c]!=0)
+ {
+ mask = &maskLogE[pos[c]-1][0];
+ for (i=0;i<21;i++)
+ bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i];
+ } else {
+ for (i=0;i<21;i++)
+ bandLogE[21*c+i] = 0;
+ }
+#if 0
+ for (i=0;i<21;i++)
+ printf("%f ", bandLogE[21*c+i]);
+ printf("\n");
+#endif
+#if 0
+ float sum=0;
+ for (i=0;i<21;i++)
+ sum += bandLogE[21*c+i];
+ printf("%f ", sum/(float)QCONST32(21.f, DB_SHIFT));
+ printf("\n");
+#endif
+ }
+ RESTORE_STACK;
+}
+
+opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams)
+{
+ int coupled_size;
+ int mono_size;
+
+ if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0;
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+ return align(sizeof(OpusMSEncoder))
+ + nb_coupled_streams * align(coupled_size)
+ + (nb_streams-nb_coupled_streams) * align(mono_size);
+}
+
+opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_family)
+{
+ int nb_streams;
+ int nb_coupled_streams;
+ opus_int32 size;
+
+ if (mapping_family==0)
+ {
+ if (channels==1)
+ {
+ nb_streams=1;
+ nb_coupled_streams=0;
+ } else if (channels==2)
+ {
+ nb_streams=1;
+ nb_coupled_streams=1;
+ } else
+ return 0;
+ } else if (mapping_family==1 && channels<=8 && channels>=1)
+ {
+ nb_streams=vorbis_mappings[channels-1].nb_streams;
+ nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
+ } else if (mapping_family==255)
+ {
+ nb_streams=channels;
+ nb_coupled_streams=0;
+ } else
+ return 0;
+ size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
+ if (channels>2)
+ {
+ size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));
+ }
+ return size;
+}
+
+
+static int opus_multistream_encoder_init_impl(
+ OpusMSEncoder *st,
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping,
+ int application,
+ int surround
+)
+{
+ int coupled_size;
+ int mono_size;
+ int i, ret;
+ char *ptr;
+
+ if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+ (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+ return OPUS_BAD_ARG;
+
+ st->layout.nb_channels = channels;
+ st->layout.nb_streams = streams;
+ st->layout.nb_coupled_streams = coupled_streams;
+ st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;
+ if (!surround)
+ st->lfe_stream = -1;
+ st->bitrate_bps = OPUS_AUTO;
+ st->application = application;
+ st->variable_duration = OPUS_FRAMESIZE_ARG;
+ for (i=0;i<st->layout.nb_channels;i++)
+ st->layout.mapping[i] = mapping[i];
+ if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
+ return OPUS_BAD_ARG;
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+
+ for (i=0;i<st->layout.nb_coupled_streams;i++)
+ {
+ ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application);
+ if(ret!=OPUS_OK)return ret;
+ if (i==st->lfe_stream)
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
+ ptr += align(coupled_size);
+ }
+ for (;i<st->layout.nb_streams;i++)
+ {
+ ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application);
+ if (i==st->lfe_stream)
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
+ if(ret!=OPUS_OK)return ret;
+ ptr += align(mono_size);
+ }
+ if (surround)
+ {
+ OPUS_CLEAR(ms_get_preemph_mem(st), channels);
+ OPUS_CLEAR(ms_get_window_mem(st), channels*120);
+ }
+ st->surround = surround;
+ return OPUS_OK;
+}
+
+int opus_multistream_encoder_init(
+ OpusMSEncoder *st,
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping,
+ int application
+)
+{
+ return opus_multistream_encoder_init_impl(st, Fs, channels, streams, coupled_streams, mapping, application, 0);
+}
+
+int opus_multistream_surround_encoder_init(
+ OpusMSEncoder *st,
+ opus_int32 Fs,
+ int channels,
+ int mapping_family,
+ int *streams,
+ int *coupled_streams,
+ unsigned char *mapping,
+ int application
+)
+{
+ if ((channels>255) || (channels<1))
+ return OPUS_BAD_ARG;
+ st->lfe_stream = -1;
+ if (mapping_family==0)
+ {
+ if (channels==1)
+ {
+ *streams=1;
+ *coupled_streams=0;
+ mapping[0]=0;
+ } else if (channels==2)
+ {
+ *streams=1;
+ *coupled_streams=1;
+ mapping[0]=0;
+ mapping[1]=1;
+ } else
+ return OPUS_UNIMPLEMENTED;
+ } else if (mapping_family==1 && channels<=8 && channels>=1)
+ {
+ int i;
+ *streams=vorbis_mappings[channels-1].nb_streams;
+ *coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
+ for (i=0;i<channels;i++)
+ mapping[i] = vorbis_mappings[channels-1].mapping[i];
+ if (channels>=6)
+ st->lfe_stream = *streams-1;
+ } else if (mapping_family==255)
+ {
+ int i;
+ *streams=channels;
+ *coupled_streams=0;
+ for(i=0;i<channels;i++)
+ mapping[i] = i;
+ } else
+ return OPUS_UNIMPLEMENTED;
+ return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, *coupled_streams,
+ mapping, application, channels>2&&mapping_family==1);
+}
+
+OpusMSEncoder *opus_multistream_encoder_create(
+ opus_int32 Fs,
+ int channels,
+ int streams,
+ int coupled_streams,
+ const unsigned char *mapping,
+ int application,
+ int *error
+)
+{
+ int ret;
+ OpusMSEncoder *st;
+ if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+ (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+ st = (OpusMSEncoder *)opus_alloc(opus_multistream_encoder_get_size(streams, coupled_streams));
+ if (st==NULL)
+ {
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ return NULL;
+ }
+ ret = opus_multistream_encoder_init(st, Fs, channels, streams, coupled_streams, mapping, application);
+ if (ret != OPUS_OK)
+ {
+ opus_free(st);
+ st = NULL;
+ }
+ if (error)
+ *error = ret;
+ return st;
+}
+
+OpusMSEncoder *opus_multistream_surround_encoder_create(
+ opus_int32 Fs,
+ int channels,
+ int mapping_family,
+ int *streams,
+ int *coupled_streams,
+ unsigned char *mapping,
+ int application,
+ int *error
+)
+{
+ int ret;
+ OpusMSEncoder *st;
+ if ((channels>255) || (channels<1))
+ {
+ if (error)
+ *error = OPUS_BAD_ARG;
+ return NULL;
+ }
+ st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family));
+ if (st==NULL)
+ {
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ return NULL;
+ }
+ ret = opus_multistream_surround_encoder_init(st, Fs, channels, mapping_family, streams, coupled_streams, mapping, application);
+ if (ret != OPUS_OK)
+ {
+ opus_free(st);
+ st = NULL;
+ }
+ if (error)
+ *error = ret;
+ return st;
+}
+
+static void surround_rate_allocation(
+ OpusMSEncoder *st,
+ opus_int32 *rate,
+ int frame_size
+ )
+{
+ int i;
+ opus_int32 channel_rate;
+ opus_int32 Fs;
+ char *ptr;
+ int stream_offset;
+ int lfe_offset;
+ int coupled_ratio; /* Q8 */
+ int lfe_ratio; /* Q8 */
+
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+
+ if (st->bitrate_bps > st->layout.nb_channels*40000)
+ stream_offset = 20000;
+ else
+ stream_offset = st->bitrate_bps/st->layout.nb_channels/2;
+ stream_offset += 60*(Fs/frame_size-50);
+ /* We start by giving each stream (coupled or uncoupled) the same bitrate.
+ This models the main saving of coupled channels over uncoupled. */
+ /* The LFE stream is an exception to the above and gets fewer bits. */
+ lfe_offset = 3500 + 60*(Fs/frame_size-50);
+ /* Coupled streams get twice the mono rate after the first 20 kb/s. */
+ coupled_ratio = 512;
+ /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */
+ lfe_ratio = 32;
+
+ /* Compute bitrate allocation between streams */
+ if (st->bitrate_bps==OPUS_AUTO)
+ {
+ channel_rate = Fs+60*Fs/frame_size;
+ } else if (st->bitrate_bps==OPUS_BITRATE_MAX)
+ {
+ channel_rate = 300000;
+ } else {
+ int nb_lfe;
+ int nb_uncoupled;
+ int nb_coupled;
+ int total;
+ nb_lfe = (st->lfe_stream!=-1);
+ nb_coupled = st->layout.nb_coupled_streams;
+ nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe;
+ total = (nb_uncoupled<<8) /* mono */
+ + coupled_ratio*nb_coupled /* stereo */
+ + nb_lfe*lfe_ratio;
+ channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total;
+ }
+#ifndef OPUS_FIXED_POINT
+ if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
+ {
+ opus_int32 bonus;
+ bonus = 60*(Fs/frame_size-50);
+ channel_rate += bonus;
+ }
+#endif
+
+ for (i=0;i<st->layout.nb_streams;i++)
+ {
+ if (i<st->layout.nb_coupled_streams)
+ rate[i] = stream_offset+(channel_rate*coupled_ratio>>8);
+ else if (i!=st->lfe_stream)
+ rate[i] = stream_offset+channel_rate;
+ else
+ rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+ }
+}
+
+/* Max size in case the encoder decides to return three frames */
+#define MS_FRAME_TMP (3*1275+7)
+static int opus_multistream_encode_native
+(
+ OpusMSEncoder *st,
+ opus_copy_channel_in_func copy_channel_in,
+ const void *pcm,
+ int analysis_frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes,
+ int lsb_depth,
+ downmix_func downmix
+)
+{
+ opus_int32 Fs;
+ int coupled_size;
+ int mono_size;
+ int s;
+ char *ptr;
+ int tot_size;
+ VARDECL(opus_val16, buf);
+ VARDECL(opus_val16, bandSMR);
+ unsigned char tmp_data[MS_FRAME_TMP];
+ OpusRepacketizer rp;
+ opus_int32 vbr;
+ const CELTMode *celt_mode;
+ opus_int32 bitrates[256];
+ opus_val16 bandLogE[42];
+ opus_val32 *mem = NULL;
+ opus_val32 *preemph_mem=NULL;
+ int frame_size;
+ ALLOC_STACK;
+
+ if (st->surround)
+ {
+ preemph_mem = ms_get_preemph_mem(st);
+ mem = ms_get_window_mem(st);
+ }
+
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr));
+ opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
+
+ {
+ opus_int32 delay_compensation;
+ int channels;
+
+ channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
+ delay_compensation -= Fs/400;
+ frame_size = compute_frame_size(pcm, analysis_frame_size,
+ st->variable_duration, channels, Fs, st->bitrate_bps,
+ delay_compensation, downmix
+#ifndef DISABLE_FLOAT_API
+ , st->subframe_mem
+#endif
+ );
+ }
+
+ if (400*frame_size < Fs)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ /* Validate frame_size before using it to allocate stack space.
+ This mirrors the checks in opus_encode[_float](). */
+ if (400*frame_size != Fs && 200*frame_size != Fs &&
+ 100*frame_size != Fs && 50*frame_size != Fs &&
+ 25*frame_size != Fs && 50*frame_size != 3*Fs)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ ALLOC(buf, 2*frame_size, opus_val16);
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+
+ ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
+ if (st->surround)
+ {
+ surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
+ }
+
+ if (max_data_bytes < 4*st->layout.nb_streams-1)
+ {
+ RESTORE_STACK;
+ return OPUS_BUFFER_TOO_SMALL;
+ }
+
+ /* Compute bitrate allocation between streams (this could be a lot better) */
+ surround_rate_allocation(st, bitrates, frame_size);
+
+ if (!vbr)
+ max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size));
+
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusEncoder *enc;
+ enc = (OpusEncoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
+ if (st->surround)
+ {
+ opus_int32 equiv_rate;
+ equiv_rate = st->bitrate_bps;
+ if (frame_size*50 < Fs)
+ equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;
+ if (equiv_rate > 10000*st->layout.nb_channels)
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+ else if (equiv_rate > 7000*st->layout.nb_channels)
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND));
+ else if (equiv_rate > 5000*st->layout.nb_channels)
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND));
+ else
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
+ if (s < st->layout.nb_coupled_streams)
+ {
+ /* To preserve the spatial image, force stereo CELT on coupled streams */
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
+ }
+ }
+ }
+
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ /* Counting ToC */
+ tot_size = 0;
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusEncoder *enc;
+ int len;
+ int curr_max;
+ int c1, c2;
+
+ opus_repacketizer_init(&rp);
+ enc = (OpusEncoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ {
+ int i;
+ int left, right;
+ left = get_left_channel(&st->layout, s, -1);
+ right = get_right_channel(&st->layout, s, -1);
+ (*copy_channel_in)(buf, 2,
+ pcm, st->layout.nb_channels, left, frame_size);
+ (*copy_channel_in)(buf+1, 2,
+ pcm, st->layout.nb_channels, right, frame_size);
+ ptr += align(coupled_size);
+ if (st->surround)
+ {
+ for (i=0;i<21;i++)
+ {
+ bandLogE[i] = bandSMR[21*left+i];
+ bandLogE[21+i] = bandSMR[21*right+i];
+ }
+ }
+ c1 = left;
+ c2 = right;
+ } else {
+ int i;
+ int chan = get_mono_channel(&st->layout, s, -1);
+ (*copy_channel_in)(buf, 1,
+ pcm, st->layout.nb_channels, chan, frame_size);
+ ptr += align(mono_size);
+ if (st->surround)
+ {
+ for (i=0;i<21;i++)
+ bandLogE[i] = bandSMR[21*chan+i];
+ }
+ c1 = chan;
+ c2 = -1;
+ }
+ if (st->surround)
+ opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
+ /* number of bytes left (+Toc) */
+ curr_max = max_data_bytes - tot_size;
+ /* Reserve three bytes for the last stream and four for the others */
+ curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
+ curr_max = IMIN(curr_max,MS_FRAME_TMP);
+ if (!vbr && s == st->layout.nb_streams-1)
+ opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size)));
+ len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,
+ pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);
+ if (len<0)
+ {
+ RESTORE_STACK;
+ return len;
+ }
+ /* We need to use the repacketizer to add the self-delimiting lengths
+ while taking into account the fact that the encoder can now return
+ more than one frame at a time (e.g. 60 ms CELT-only) */
+ opus_repacketizer_cat(&rp, tmp_data, len);
+ len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp),
+ data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1);
+ data += len;
+ tot_size += len;
+ }
+ /*printf("\n");*/
+ RESTORE_STACK;
+ return tot_size;
+}
+
+#if !defined(DISABLE_FLOAT_API)
+static void opus_copy_channel_in_float(
+ opus_val16 *dst,
+ int dst_stride,
+ const void *src,
+ int src_stride,
+ int src_channel,
+ int frame_size
+)
+{
+ const float *float_src;
+ opus_int32 i;
+ float_src = (const float *)src;
+ for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+ dst[i*dst_stride] = FLOAT2INT16(float_src[i*src_stride+src_channel]);
+#else
+ dst[i*dst_stride] = float_src[i*src_stride+src_channel];
+#endif
+}
+#endif
+
+static void opus_copy_channel_in_short(
+ opus_val16 *dst,
+ int dst_stride,
+ const void *src,
+ int src_stride,
+ int src_channel,
+ int frame_size
+)
+{
+ const opus_int16 *short_src;
+ opus_int32 i;
+ short_src = (const opus_int16 *)src;
+ for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+ dst[i*dst_stride] = short_src[i*src_stride+src_channel];
+#else
+ dst[i*dst_stride] = (1/32768.f)*short_src[i*src_stride+src_channel];
+#endif
+}
+
+
+#ifdef OPUS_FIXED_POINT
+int opus_multistream_encode(
+ OpusMSEncoder *st,
+ const opus_val16 *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+)
+{
+ return opus_multistream_encode_native(st, opus_copy_channel_in_short,
+ pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_multistream_encode_float(
+ OpusMSEncoder *st,
+ const float *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+)
+{
+ return opus_multistream_encode_native(st, opus_copy_channel_in_float,
+ pcm, frame_size, data, max_data_bytes, 16, downmix_float);
+}
+#endif
+
+#else
+
+int opus_multistream_encode_float
+(
+ OpusMSEncoder *st,
+ const opus_val16 *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+)
+{
+ return opus_multistream_encode_native(st, opus_copy_channel_in_float,
+ pcm, frame_size, data, max_data_bytes, 24, downmix_float);
+}
+
+int opus_multistream_encode(
+ OpusMSEncoder *st,
+ const opus_int16 *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+)
+{
+ return opus_multistream_encode_native(st, opus_copy_channel_in_short,
+ pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+}
+#endif
+
+int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
+{
+ va_list ap;
+ int coupled_size, mono_size;
+ char *ptr;
+ int ret = OPUS_OK;
+
+ va_start(ap, request);
+
+ coupled_size = opus_encoder_get_size(2);
+ mono_size = opus_encoder_get_size(1);
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ switch (request)
+ {
+ case OPUS_SET_BITRATE_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX)
+ {
+ goto bad_arg;
+ }
+ st->bitrate_bps = value;
+ }
+ break;
+ case OPUS_GET_BITRATE_REQUEST:
+ {
+ int s;
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = 0;
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ opus_int32 rate;
+ OpusEncoder *enc;
+ enc = (OpusEncoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ opus_encoder_ctl(enc, request, &rate);
+ *value += rate;
+ }
+ }
+ break;
+ case OPUS_GET_LSB_DEPTH_REQUEST:
+ case OPUS_GET_VBR_REQUEST:
+ case OPUS_GET_APPLICATION_REQUEST:
+ case OPUS_GET_BANDWIDTH_REQUEST:
+ case OPUS_GET_COMPLEXITY_REQUEST:
+ case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
+ case OPUS_GET_DTX_REQUEST:
+ case OPUS_GET_VOICE_RATIO_REQUEST:
+ case OPUS_GET_VBR_CONSTRAINT_REQUEST:
+ case OPUS_GET_SIGNAL_REQUEST:
+ case OPUS_GET_LOOKAHEAD_REQUEST:
+ case OPUS_GET_SAMPLE_RATE_REQUEST:
+ case OPUS_GET_INBAND_FEC_REQUEST:
+ case OPUS_GET_FORCE_CHANNELS_REQUEST:
+ case OPUS_GET_PREDICTION_DISABLED_REQUEST:
+ {
+ OpusEncoder *enc;
+ /* For int32* GET params, just query the first stream */
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ enc = (OpusEncoder*)ptr;
+ ret = opus_encoder_ctl(enc, request, value);
+ }
+ break;
+ case OPUS_GET_FINAL_RANGE_REQUEST:
+ {
+ int s;
+ opus_uint32 *value = va_arg(ap, opus_uint32*);
+ opus_uint32 tmp;
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value=0;
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusEncoder *enc;
+ enc = (OpusEncoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ ret = opus_encoder_ctl(enc, request, &tmp);
+ if (ret != OPUS_OK) break;
+ *value ^= tmp;
+ }
+ }
+ break;
+ case OPUS_SET_LSB_DEPTH_REQUEST:
+ case OPUS_SET_COMPLEXITY_REQUEST:
+ case OPUS_SET_VBR_REQUEST:
+ case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+ case OPUS_SET_MAX_BANDWIDTH_REQUEST:
+ case OPUS_SET_BANDWIDTH_REQUEST:
+ case OPUS_SET_SIGNAL_REQUEST:
+ case OPUS_SET_APPLICATION_REQUEST:
+ case OPUS_SET_INBAND_FEC_REQUEST:
+ case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+ case OPUS_SET_DTX_REQUEST:
+ case OPUS_SET_FORCE_MODE_REQUEST:
+ case OPUS_SET_FORCE_CHANNELS_REQUEST:
+ case OPUS_SET_PREDICTION_DISABLED_REQUEST:
+ {
+ int s;
+ /* This works for int32 params */
+ opus_int32 value = va_arg(ap, opus_int32);
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusEncoder *enc;
+
+ enc = (OpusEncoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ ret = opus_encoder_ctl(enc, request, value);
+ if (ret != OPUS_OK)
+ break;
+ }
+ }
+ break;
+ case OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST:
+ {
+ int s;
+ opus_int32 stream_id;
+ OpusEncoder **value;
+ stream_id = va_arg(ap, opus_int32);
+ if (stream_id<0 || stream_id >= st->layout.nb_streams)
+ ret = OPUS_BAD_ARG;
+ value = va_arg(ap, OpusEncoder**);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ for (s=0;s<stream_id;s++)
+ {
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ }
+ *value = (OpusEncoder*)ptr;
+ }
+ break;
+ case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->variable_duration = value;
+ }
+ break;
+ case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->variable_duration;
+ }
+ break;
+ case OPUS_RESET_STATE:
+ {
+ int s;
+ st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0;
+ if (st->surround)
+ {
+ OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels);
+ OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120);
+ }
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusEncoder *enc;
+ enc = (OpusEncoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ ret = opus_encoder_ctl(enc, OPUS_RESET_STATE);
+ if (ret != OPUS_OK)
+ break;
+ }
+ }
+ break;
+ default:
+ ret = OPUS_UNIMPLEMENTED;
+ break;
+ }
+
+ va_end(ap);
+ return ret;
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
+}
+
+void opus_multistream_encoder_destroy(OpusMSEncoder *st)
+{
+ opus_free(st);
+}
diff --git a/drivers/opus/opus_private.h b/drivers/opus/opus_private.h
new file mode 100644
index 0000000000..83225f2b6c
--- /dev/null
+++ b/drivers/opus/opus_private.h
@@ -0,0 +1,129 @@
+/* Copyright (c) 2012 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OPUS_PRIVATE_H
+#define OPUS_PRIVATE_H
+
+#include "arch.h"
+#include "opus.h"
+#include "celt.h"
+
+struct OpusRepacketizer {
+ unsigned char toc;
+ int nb_frames;
+ const unsigned char *frames[48];
+ opus_int16 len[48];
+ int framesize;
+};
+
+typedef struct ChannelLayout {
+ int nb_channels;
+ int nb_streams;
+ int nb_coupled_streams;
+ unsigned char mapping[256];
+} ChannelLayout;
+
+int validate_layout(const ChannelLayout *layout);
+int get_left_channel(const ChannelLayout *layout, int stream_id, int prev);
+int get_right_channel(const ChannelLayout *layout, int stream_id, int prev);
+int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
+
+
+
+#define MODE_SILK_ONLY 1000
+#define MODE_HYBRID 1001
+#define MODE_CELT_ONLY 1002
+
+#define OPUS_SET_VOICE_RATIO_REQUEST 11018
+#define OPUS_GET_VOICE_RATIO_REQUEST 11019
+
+/** Configures the encoder's expected percentage of voice
+ * opposed to music or other signals.
+ *
+ * @note This interface is currently more aspiration than actuality. It's
+ * ultimately expected to bias an automatic signal classifier, but it currently
+ * just shifts the static bitrate to mode mapping around a little bit.
+ *
+ * @param[in] x <tt>int</tt>: Voice percentage in the range 0-100, inclusive.
+ * @hideinitializer */
+#define OPUS_SET_VOICE_RATIO(x) OPUS_SET_VOICE_RATIO_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured voice ratio value, @see OPUS_SET_VOICE_RATIO
+ *
+ * @param[out] x <tt>int*</tt>: Voice percentage in the range 0-100, inclusive.
+ * @hideinitializer */
+#define OPUS_GET_VOICE_RATIO(x) OPUS_GET_VOICE_RATIO_REQUEST, __opus_check_int_ptr(x)
+
+
+#define OPUS_SET_FORCE_MODE_REQUEST 11002
+#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
+
+typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+ int bitrate, opus_val16 tonality, float *mem, int buffering,
+ downmix_func downmix);
+
+int encode_size(int size, unsigned char *data);
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
+
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+ int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+ int delay_compensation, downmix_func downmix
+#ifndef DISABLE_FLOAT_API
+ , float *subframe_mem
+#endif
+ );
+
+opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+ unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+ const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
+
+int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
+ opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
+ opus_int32 *packet_offset, int soft_clip);
+
+/* Make sure everything's aligned to sizeof(void *) bytes */
+static OPUS_INLINE int align(int i)
+{
+ return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *);
+}
+
+int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+ int self_delimited, unsigned char *out_toc,
+ const unsigned char *frames[48], opus_int16 size[48],
+ int *payload_offset, opus_int32 *packet_offset);
+
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+ unsigned char *data, opus_int32 maxlen, int self_delimited, int pad);
+
+int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+#endif /* OPUS_PRIVATE_H */
diff --git a/drivers/opus/opus_types.h b/drivers/opus/opus_types.h
new file mode 100644
index 0000000000..b28e03aea2
--- /dev/null
+++ b/drivers/opus/opus_types.h
@@ -0,0 +1,159 @@
+/* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */
+/* Modified by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* opus_types.h based on ogg_types.h from libogg */
+
+/**
+ @file opus_types.h
+ @brief Opus reference implementation types
+*/
+#ifndef OPUS_TYPES_H
+#define OPUS_TYPES_H
+
+/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
+#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
+#include <stdint.h>
+
+ typedef int16_t opus_int16;
+ typedef uint16_t opus_uint16;
+ typedef int32_t opus_int32;
+ typedef uint32_t opus_uint32;
+#elif defined(_WIN32)
+
+# if defined(__CYGWIN__)
+# include <_G_config.h>
+ typedef _G_int32_t opus_int32;
+ typedef _G_uint32_t opus_uint32;
+ typedef _G_int16 opus_int16;
+ typedef _G_uint16 opus_uint16;
+# elif defined(__MINGW32__)
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+ typedef int opus_int32;
+ typedef unsigned int opus_uint32;
+# elif defined(__MWERKS__)
+ typedef int opus_int32;
+ typedef unsigned int opus_uint32;
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+# else
+ /* MSVC/Borland */
+ typedef __int32 opus_int32;
+ typedef unsigned __int32 opus_uint32;
+ typedef __int16 opus_int16;
+ typedef unsigned __int16 opus_uint16;
+# endif
+
+#elif defined(__MACOS__)
+
+# include <sys/types.h>
+ typedef SInt16 opus_int16;
+ typedef UInt16 opus_uint16;
+ typedef SInt32 opus_int32;
+ typedef UInt32 opus_uint32;
+
+#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
+
+# include <sys/types.h>
+ typedef int16_t opus_int16;
+ typedef u_int16_t opus_uint16;
+ typedef int32_t opus_int32;
+ typedef u_int32_t opus_uint32;
+
+#elif defined(__BEOS__)
+
+ /* Be */
+# include <inttypes.h>
+ typedef int16 opus_int16;
+ typedef u_int16 opus_uint16;
+ typedef int32_t opus_int32;
+ typedef u_int32_t opus_uint32;
+
+#elif defined (__EMX__)
+
+ /* OS/2 GCC */
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+ typedef int opus_int32;
+ typedef unsigned int opus_uint32;
+
+#elif defined (DJGPP)
+
+ /* DJGPP */
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+ typedef int opus_int32;
+ typedef unsigned int opus_uint32;
+
+#elif defined(R5900)
+
+ /* PS2 EE */
+ typedef int opus_int32;
+ typedef unsigned opus_uint32;
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+
+#elif defined(__SYMBIAN32__)
+
+ /* Symbian GCC */
+ typedef signed short opus_int16;
+ typedef unsigned short opus_uint16;
+ typedef signed int opus_int32;
+ typedef unsigned int opus_uint32;
+
+#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
+
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+ typedef long opus_int32;
+ typedef unsigned long opus_uint32;
+
+#elif defined(CONFIG_TI_C6X)
+
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+ typedef int opus_int32;
+ typedef unsigned int opus_uint32;
+
+#else
+
+ /* Give up, take a reasonable guess */
+ typedef short opus_int16;
+ typedef unsigned short opus_uint16;
+ typedef int opus_int32;
+ typedef unsigned int opus_uint32;
+
+#endif
+
+#define opus_int int /* used for counters etc; at least 16 bits */
+#define opus_int64 long long
+#define opus_int8 signed char
+
+#define opus_uint unsigned int /* used for counters etc; at least 16 bits */
+#define opus_uint64 unsigned long long
+#define opus_uint8 unsigned char
+
+#endif /* OPUS_TYPES_H */
diff --git a/drivers/opus/opusfile.c b/drivers/opus/opusfile.c
new file mode 100644
index 0000000000..1e7497f6cd
--- /dev/null
+++ b/drivers/opus/opusfile.c
@@ -0,0 +1,3158 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
+
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <math.h>
+
+#include "opusfile.h"
+
+/*This implementation is largely based off of libvorbisfile.
+ All of the Ogg bits work roughly the same, though I have made some
+ "improvements" that have not been folded back there, yet.*/
+
+/*A 'chained bitstream' is an Ogg Opus bitstream that contains more than one
+ logical bitstream arranged end to end (the only form of Ogg multiplexing
+ supported by this library.
+ Grouping (parallel multiplexing) is not supported, except to the extent that
+ if there are multiple logical Ogg streams in a single link of the chain, we
+ will ignore all but the first Opus stream we find.*/
+
+/*An Ogg Opus file can be played beginning to end (streamed) without worrying
+ ahead of time about chaining (see opusdec from the opus-tools package).
+ If we have the whole file, however, and want random access
+ (seeking/scrubbing) or desire to know the total length/time of a file, we
+ need to account for the possibility of chaining.*/
+
+/*We can handle things a number of ways.
+ We can determine the entire bitstream structure right off the bat, or find
+ pieces on demand.
+ This library determines and caches structure for the entire bitstream, but
+ builds a virtual decoder on the fly when moving between links in the chain.*/
+
+/*There are also different ways to implement seeking.
+ Enough information exists in an Ogg bitstream to seek to sample-granularity
+ positions in the output.
+ Or, one can seek by picking some portion of the stream roughly in the desired
+ area if we only want coarse navigation through the stream.
+ We implement and expose both strategies.*/
+
+/*The maximum number of bytes in a page (including the page headers).*/
+#define OP_PAGE_SIZE_MAX (65307)
+/*The default amount to seek backwards per step when trying to find the
+ previous page.
+ This must be at least as large as the maximum size of a page.*/
+#define OP_CHUNK_SIZE (65536)
+/*The maximum amount to seek backwards per step when trying to find the
+ previous page.*/
+#define OP_CHUNK_SIZE_MAX (1024*(opus_int32)1024)
+/*A smaller read size is needed for low-rate streaming.*/
+#define OP_READ_SIZE (2048)
+
+int op_test(OpusHead *_head,
+ const unsigned char *_initial_data,size_t _initial_bytes){
+ ogg_sync_state oy;
+ char *data;
+ int err;
+ /*The first page of a normal Opus file will be at most 57 bytes (27 Ogg
+ page header bytes + 1 lacing value + 21 Opus header bytes + 8 channel
+ mapping bytes).
+ It will be at least 47 bytes (27 Ogg page header bytes + 1 lacing value +
+ 19 Opus header bytes using channel mapping family 0).
+ If we don't have at least that much data, give up now.*/
+ if(_initial_bytes<47)return OP_FALSE;
+ /*Only proceed if we start with the magic OggS string.
+ This is to prevent us spending a lot of time allocating memory and looking
+ for Ogg pages in non-Ogg files.*/
+ if(memcmp(_initial_data,"OggS",4)!=0)return OP_ENOTFORMAT;
+ ogg_sync_init(&oy);
+ data=ogg_sync_buffer(&oy,_initial_bytes);
+ if(data!=NULL){
+ ogg_stream_state os;
+ ogg_page og;
+ int ret;
+ memcpy(data,_initial_data,_initial_bytes);
+ ogg_sync_wrote(&oy,_initial_bytes);
+ ogg_stream_init(&os,-1);
+ err=OP_FALSE;
+ do{
+ ogg_packet op;
+ ret=ogg_sync_pageout(&oy,&og);
+ /*Ignore holes.*/
+ if(ret<0)continue;
+ /*Stop if we run out of data.*/
+ if(!ret)break;
+ ogg_stream_reset_serialno(&os,ogg_page_serialno(&og));
+ ogg_stream_pagein(&os,&og);
+ /*Only process the first packet on this page (if it's a BOS packet,
+ it's required to be the only one).*/
+ if(ogg_stream_packetout(&os,&op)==1){
+ if(op.b_o_s){
+ ret=opus_head_parse(_head,op.packet,op.bytes);
+ /*If this didn't look like Opus, keep going.*/
+ if(ret==OP_ENOTFORMAT)continue;
+ /*Otherwise we're done, one way or another.*/
+ err=ret;
+ }
+ /*We finished parsing the headers.
+ There is no Opus to be found.*/
+ else err=OP_ENOTFORMAT;
+ }
+ }
+ while(err==OP_FALSE);
+ ogg_stream_clear(&os);
+ }
+ else err=OP_EFAULT;
+ ogg_sync_clear(&oy);
+ return err;
+}
+
+/*Many, many internal helpers.
+ The intention is not to be confusing.
+ Rampant duplication and monolithic function implementation (though we do have
+ some large, omnibus functions still) would be harder to understand anyway.
+ The high level functions are last.
+ Begin grokking near the end of the file if you prefer to read things
+ top-down.*/
+
+/*The read/seek functions track absolute position within the stream.*/
+
+/*Read a little more data from the file/pipe into the ogg_sync framer.
+ _nbytes: The maximum number of bytes to read.
+ Return: A positive number of bytes read on success, 0 on end-of-file, or a
+ negative value on failure.*/
+static int op_get_data(OggOpusFile *_of,int _nbytes){
+ unsigned char *buffer;
+ int nbytes;
+ OP_ASSERT(_nbytes>0);
+ buffer=(unsigned char *)ogg_sync_buffer(&_of->oy,_nbytes);
+ nbytes=(int)(*_of->callbacks.read)(_of->source,buffer,_nbytes);
+ OP_ASSERT(nbytes<=_nbytes);
+ if(OP_LIKELY(nbytes>0))ogg_sync_wrote(&_of->oy,nbytes);
+ return nbytes;
+}
+
+/*Save a tiny smidge of verbosity to make the code more readable.*/
+static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){
+ if(_offset==_of->offset)return 0;
+ if(_of->callbacks.seek==NULL
+ ||(*_of->callbacks.seek)(_of->source,_offset,SEEK_SET)){
+ return OP_EREAD;
+ }
+ _of->offset=_offset;
+ ogg_sync_reset(&_of->oy);
+ return 0;
+}
+
+/*Get the current position indicator of the underlying source.
+ This should be the same as the value reported by tell().*/
+static opus_int64 op_position(const OggOpusFile *_of){
+ /*The current position indicator is _not_ simply offset.
+ We may also have unprocessed, buffered data in the sync state.*/
+ return _of->offset+_of->oy.fill-_of->oy.returned;
+}
+
+/*From the head of the stream, get the next page.
+ _boundary specifies if the function is allowed to fetch more data from the
+ stream (and how much) or only use internally buffered data.
+ _boundary: -1: Unbounded search.
+ 0: Read no additional data.
+ Use only cached data.
+ n: Search for the start of a new page up to file position n.
+ Return: n>=0: Found a page at absolute offset n.
+ OP_FALSE: Hit the _boundary limit.
+ OP_EREAD: An underlying read operation failed.
+ OP_BADLINK: We hit end-of-file before reaching _boundary.*/
+static opus_int64 op_get_next_page(OggOpusFile *_of,ogg_page *_og,
+ opus_int64 _boundary){
+ while(_boundary<=0||_of->offset<_boundary){
+ int more;
+ more=ogg_sync_pageseek(&_of->oy,_og);
+ /*Skipped (-more) bytes.*/
+ if(OP_UNLIKELY(more<0))_of->offset-=more;
+ else if(more==0){
+ int read_nbytes;
+ int ret;
+ /*Send more paramedics.*/
+ if(!_boundary)return OP_FALSE;
+ if(_boundary<0)read_nbytes=OP_READ_SIZE;
+ else{
+ opus_int64 position;
+ position=op_position(_of);
+ if(position>=_boundary)return OP_FALSE;
+ read_nbytes=(int)OP_MIN(_boundary-position,OP_READ_SIZE);
+ }
+ ret=op_get_data(_of,read_nbytes);
+ if(OP_UNLIKELY(ret<0))return OP_EREAD;
+ if(OP_UNLIKELY(ret==0)){
+ /*Only fail cleanly on EOF if we didn't have a known boundary.
+ Otherwise, we should have been able to reach that boundary, and this
+ is a fatal error.*/
+ return OP_UNLIKELY(_boundary<0)?OP_FALSE:OP_EBADLINK;
+ }
+ }
+ else{
+ /*Got a page.
+ Return the page start offset and advance the internal offset past the
+ page end.*/
+ opus_int64 page_offset;
+ page_offset=_of->offset;
+ _of->offset+=more;
+ OP_ASSERT(page_offset>=0);
+ return page_offset;
+ }
+ }
+ return OP_FALSE;
+}
+
+static int op_add_serialno(const ogg_page *_og,
+ ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){
+ ogg_uint32_t *serialnos;
+ int nserialnos;
+ int cserialnos;
+ ogg_uint32_t s;
+ s=ogg_page_serialno(_og);
+ serialnos=*_serialnos;
+ nserialnos=*_nserialnos;
+ cserialnos=*_cserialnos;
+ if(OP_UNLIKELY(nserialnos>=cserialnos)){
+ if(OP_UNLIKELY(cserialnos>INT_MAX-1>>1))return OP_EFAULT;
+ cserialnos=2*cserialnos+1;
+ OP_ASSERT(nserialnos<cserialnos);
+ serialnos=(ogg_uint32_t *)_ogg_realloc(serialnos,
+ sizeof(*serialnos)*cserialnos);
+ if(OP_UNLIKELY(serialnos==NULL))return OP_EFAULT;
+ }
+ serialnos[nserialnos++]=s;
+ *_serialnos=serialnos;
+ *_nserialnos=nserialnos;
+ *_cserialnos=cserialnos;
+ return 0;
+}
+
+/*Returns nonzero if found.*/
+static int op_lookup_serialno(ogg_uint32_t _s,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+ int i;
+ for(i=0;i<_nserialnos&&_serialnos[i]!=_s;i++);
+ return i<_nserialnos;
+}
+
+static int op_lookup_page_serialno(const ogg_page *_og,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+ return op_lookup_serialno(ogg_page_serialno(_og),_serialnos,_nserialnos);
+}
+
+typedef struct OpusSeekRecord OpusSeekRecord;
+
+/*We use this to remember the pages we found while enumerating the links of a
+ chained stream.
+ We keep track of the starting and ending offsets, as well as the point we
+ started searching from, so we know where to bisect.
+ We also keep the serial number, so we can tell if the page belonged to the
+ current link or not, as well as the granule position, to aid in estimating
+ the start of the link.*/
+struct OpusSeekRecord{
+ /*The earliest byte we know of such that reading forward from it causes
+ capture to be regained at this page.*/
+ opus_int64 search_start;
+ /*The offset of this page.*/
+ opus_int64 offset;
+ /*The size of this page.*/
+ opus_int32 size;
+ /*The serial number of this page.*/
+ ogg_uint32_t serialno;
+ /*The granule position of this page.*/
+ ogg_int64_t gp;
+};
+
+/*Find the last page beginning before _offset with a valid granule position.
+ There is no '_boundary' parameter as it will always have to read more data.
+ This is much dirtier than the above, as Ogg doesn't have any backward search
+ linkage.
+ This search prefers pages of the specified serial number.
+ If a page of the specified serial number is spotted during the
+ seek-back-and-read-forward, it will return the info of last page of the
+ matching serial number, instead of the very last page, unless the very last
+ page belongs to a different link than preferred serial number.
+ If no page of the specified serial number is seen, it will return the info of
+ the last page.
+ [out] _sr: Returns information about the page that was found on success.
+ _offset: The _offset before which to find a page.
+ Any page returned will consist of data entirely before _offset.
+ _serialno: The preferred serial number.
+ If a page with this serial number is found, it will be returned
+ even if another page in the same link is found closer to
+ _offset.
+ This is purely opportunistic: there is no guarantee such a page
+ will be found if it exists.
+ _serialnos: The list of serial numbers in the link that contains the
+ preferred serial number.
+ _nserialnos: The number of serial numbers in the current link.
+ Return: 0 on success, or a negative value on failure.
+ OP_EREAD: Failed to read more data (error or EOF).
+ OP_EBADLINK: We couldn't find a page even after seeking back to the
+ start of the stream.*/
+static int op_get_prev_page_serial(OggOpusFile *_of,OpusSeekRecord *_sr,
+ opus_int64 _offset,ogg_uint32_t _serialno,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+ OpusSeekRecord preferred_sr;
+ ogg_page og;
+ opus_int64 begin;
+ opus_int64 end;
+ opus_int64 original_end;
+ opus_int32 chunk_size;
+ int preferred_found;
+ original_end=end=begin=_offset;
+ preferred_found=0;
+ _offset=-1;
+ chunk_size=OP_CHUNK_SIZE;
+ do{
+ opus_int64 search_start;
+ int ret;
+ OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX);
+ begin=OP_MAX(begin-chunk_size,0);
+ ret=op_seek_helper(_of,begin);
+ if(OP_UNLIKELY(ret<0))return ret;
+ search_start=begin;
+ while(_of->offset<end){
+ opus_int64 llret;
+ ogg_uint32_t serialno;
+ llret=op_get_next_page(_of,&og,end);
+ if(OP_UNLIKELY(llret<OP_FALSE))return (int)llret;
+ else if(llret==OP_FALSE)break;
+ serialno=ogg_page_serialno(&og);
+ /*Save the information for this page.
+ We're not interested in the page itself... just the serial number, byte
+ offset, page size, and granule position.*/
+ _sr->search_start=search_start;
+ _sr->offset=_offset=llret;
+ _sr->serialno=serialno;
+ OP_ASSERT(_of->offset-_offset>=0);
+ OP_ASSERT(_of->offset-_offset<=OP_PAGE_SIZE_MAX);
+ _sr->size=(opus_int32)(_of->offset-_offset);
+ _sr->gp=ogg_page_granulepos(&og);
+ /*If this page is from the stream we're looking for, remember it.*/
+ if(serialno==_serialno){
+ preferred_found=1;
+ *&preferred_sr=*_sr;
+ }
+ if(!op_lookup_serialno(serialno,_serialnos,_nserialnos)){
+ /*We fell off the end of the link, which means we seeked back too far
+ and shouldn't have been looking in that link to begin with.
+ If we found the preferred serial number, forget that we saw it.*/
+ preferred_found=0;
+ }
+ search_start=llret+1;
+ }
+ /*We started from the beginning of the stream and found nothing.
+ This should be impossible unless the contents of the source changed out
+ from under us after we read from it.*/
+ if(OP_UNLIKELY(!begin)&&OP_UNLIKELY(_offset<0))return OP_EBADLINK;
+ /*Bump up the chunk size.
+ This is mildly helpful when seeks are very expensive (http).*/
+ chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+ /*Avoid quadratic complexity if we hit an invalid patch of the file.*/
+ end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end);
+ }
+ while(_offset<0);
+ if(preferred_found)*_sr=*&preferred_sr;
+ return 0;
+}
+
+/*Find the last page beginning before _offset with the given serial number and
+ a valid granule position.
+ Unlike the above search, this continues until it finds such a page, but does
+ not stray outside the current link.
+ We could implement it (inefficiently) by calling op_get_prev_page_serial()
+ repeatedly until it returned a page that had both our preferred serial
+ number and a valid granule position, but doing it with a separate function
+ allows us to avoid repeatedly re-scanning valid pages from other streams as
+ we seek-back-and-read-forward.
+ [out] _gp: Returns the granule position of the page that was found on
+ success.
+ _offset: The _offset before which to find a page.
+ Any page returned will consist of data entirely before _offset.
+ _serialno: The target serial number.
+ _serialnos: The list of serial numbers in the link that contains the
+ preferred serial number.
+ _nserialnos: The number of serial numbers in the current link.
+ Return: The offset of the page on success, or a negative value on failure.
+ OP_EREAD: Failed to read more data (error or EOF).
+ OP_EBADLINK: We couldn't find a page even after seeking back past the
+ beginning of the link.*/
+static opus_int64 op_get_last_page(OggOpusFile *_of,ogg_int64_t *_gp,
+ opus_int64 _offset,ogg_uint32_t _serialno,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+ ogg_page og;
+ ogg_int64_t gp;
+ opus_int64 begin;
+ opus_int64 end;
+ opus_int64 original_end;
+ opus_int32 chunk_size;
+ /*The target serial number must belong to the current link.*/
+ OP_ASSERT(op_lookup_serialno(_serialno,_serialnos,_nserialnos));
+ original_end=end=begin=_offset;
+ _offset=-1;
+ /*We shouldn't have to initialize gp, but gcc is too dumb to figure out that
+ ret>=0 implies we entered the if(page_gp!=-1) block at least once.*/
+ gp=-1;
+ chunk_size=OP_CHUNK_SIZE;
+ do{
+ int left_link;
+ int ret;
+ OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX);
+ begin=OP_MAX(begin-chunk_size,0);
+ ret=op_seek_helper(_of,begin);
+ if(OP_UNLIKELY(ret<0))return ret;
+ left_link=0;
+ while(_of->offset<end){
+ opus_int64 llret;
+ ogg_uint32_t serialno;
+ llret=op_get_next_page(_of,&og,end);
+ if(OP_UNLIKELY(llret<OP_FALSE))return llret;
+ else if(llret==OP_FALSE)break;
+ serialno=ogg_page_serialno(&og);
+ if(serialno==_serialno){
+ ogg_int64_t page_gp;
+ /*The page is from the right stream...*/
+ page_gp=ogg_page_granulepos(&og);
+ if(page_gp!=-1){
+ /*And has a valid granule position.
+ Let's remember it.*/
+ _offset=llret;
+ gp=page_gp;
+ }
+ }
+ else if(OP_UNLIKELY(!op_lookup_serialno(serialno,
+ _serialnos,_nserialnos))){
+ /*We fell off the start of the link, which means we don't need to keep
+ seeking any farther back.*/
+ left_link=1;
+ }
+ }
+ /*We started from at or before the beginning of the link and found nothing.
+ This should be impossible unless the contents of the source changed out
+ from under us after we read from it.*/
+ if((OP_UNLIKELY(left_link)||OP_UNLIKELY(!begin))&&OP_UNLIKELY(_offset<0)){
+ return OP_EBADLINK;
+ }
+ /*Bump up the chunk size.
+ This is mildly helpful when seeks are very expensive (http).*/
+ chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+ /*Avoid quadratic complexity if we hit an invalid patch of the file.*/
+ end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end);
+ }
+ while(_offset<0);
+ *_gp=gp;
+ return _offset;
+}
+
+/*Uses the local ogg_stream storage in _of.
+ This is important for non-streaming input sources.*/
+static int op_fetch_headers_impl(OggOpusFile *_of,OpusHead *_head,
+ OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos,
+ int *_cserialnos,ogg_page *_og){
+ ogg_packet op;
+ int ret;
+ if(_serialnos!=NULL)*_nserialnos=0;
+ /*Extract the serialnos of all BOS pages plus the first set of Opus headers
+ we see in the link.*/
+ while(ogg_page_bos(_og)){
+ if(_serialnos!=NULL){
+ if(OP_UNLIKELY(op_lookup_page_serialno(_og,*_serialnos,*_nserialnos))){
+ /*A dupe serialnumber in an initial header packet set==invalid stream.*/
+ return OP_EBADHEADER;
+ }
+ ret=op_add_serialno(_og,_serialnos,_nserialnos,_cserialnos);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ if(_of->ready_state<OP_STREAMSET){
+ /*We don't have an Opus stream in this link yet, so begin prospective
+ stream setup.
+ We need a stream to get packets.*/
+ ogg_stream_reset_serialno(&_of->os,ogg_page_serialno(_og));
+ ogg_stream_pagein(&_of->os,_og);
+ if(OP_LIKELY(ogg_stream_packetout(&_of->os,&op)>0)){
+ ret=opus_head_parse(_head,op.packet,op.bytes);
+ /*Found a valid Opus header.
+ Continue setup.*/
+ if(OP_LIKELY(ret>=0))_of->ready_state=OP_STREAMSET;
+ /*If it's just a stream type we don't recognize, ignore it.
+ Everything else is fatal.*/
+ else if(ret!=OP_ENOTFORMAT)return ret;
+ }
+ }
+ /*Get the next page.
+ No need to clamp the boundary offset against _of->end, as all errors
+ become OP_ENOTFORMAT or OP_EBADHEADER.*/
+ if(OP_UNLIKELY(op_get_next_page(_of,_og,
+ OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+ return _of->ready_state<OP_STREAMSET?OP_ENOTFORMAT:OP_EBADHEADER;
+ }
+ }
+ if(OP_UNLIKELY(_of->ready_state!=OP_STREAMSET))return OP_ENOTFORMAT;
+ /*If the first non-header page belonged to our Opus stream, submit it.*/
+ if(_of->os.serialno==ogg_page_serialno(_og))ogg_stream_pagein(&_of->os,_og);
+ /*Loop getting packets.*/
+ for(;;){
+ switch(ogg_stream_packetout(&_of->os,&op)){
+ case 0:{
+ /*Loop getting pages.*/
+ for(;;){
+ /*No need to clamp the boundary offset against _of->end, as all
+ errors become OP_EBADHEADER.*/
+ if(OP_UNLIKELY(op_get_next_page(_of,_og,
+ OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+ return OP_EBADHEADER;
+ }
+ /*If this page belongs to the correct stream, go parse it.*/
+ if(_of->os.serialno==ogg_page_serialno(_og)){
+ ogg_stream_pagein(&_of->os,_og);
+ break;
+ }
+ /*If the link ends before we see the Opus comment header, abort.*/
+ if(OP_UNLIKELY(ogg_page_bos(_og)))return OP_EBADHEADER;
+ /*Otherwise, keep looking.*/
+ }
+ }break;
+ /*We shouldn't get a hole in the headers!*/
+ case -1:return OP_EBADHEADER;
+ default:{
+ /*Got a packet.
+ It should be the comment header.*/
+ ret=opus_tags_parse(_tags,op.packet,op.bytes);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*Make sure the page terminated at the end of the comment header.
+ If there is another packet on the page, or part of a packet, then
+ reject the stream.
+ Otherwise seekable sources won't be able to seek back to the start
+ properly.*/
+ ret=ogg_stream_packetout(&_of->os,&op);
+ if(OP_UNLIKELY(ret!=0)
+ ||OP_UNLIKELY(_og->header[_og->header_len-1]==255)){
+ /*If we fail, the caller assumes our tags are uninitialized.*/
+ opus_tags_clear(_tags);
+ return OP_EBADHEADER;
+ }
+ return 0;
+ }
+ }
+ }
+}
+
+static int op_fetch_headers(OggOpusFile *_of,OpusHead *_head,
+ OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos,
+ int *_cserialnos,ogg_page *_og){
+ ogg_page og;
+ int ret;
+ if(!_og){
+ /*No need to clamp the boundary offset against _of->end, as all errors
+ become OP_ENOTFORMAT.*/
+ if(OP_UNLIKELY(op_get_next_page(_of,&og,
+ OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+ return OP_ENOTFORMAT;
+ }
+ _og=&og;
+ }
+ _of->ready_state=OP_OPENED;
+ ret=op_fetch_headers_impl(_of,_head,_tags,_serialnos,_nserialnos,
+ _cserialnos,_og);
+ /*Revert back from OP_STREAMSET to OP_OPENED on failure, to prevent
+ double-free of the tags in an unseekable stream.*/
+ if(OP_UNLIKELY(ret<0))_of->ready_state=OP_OPENED;
+ return ret;
+}
+
+/*Granule position manipulation routines.
+ A granule position is defined to be an unsigned 64-bit integer, with the
+ special value -1 in two's complement indicating an unset or invalid granule
+ position.
+ We are not guaranteed to have an unsigned 64-bit type, so we construct the
+ following routines that
+ a) Properly order negative numbers as larger than positive numbers, and
+ b) Check for underflow or overflow past the special -1 value.
+ This lets us operate on the full, valid range of granule positions in a
+ consistent and safe manner.
+ This full range is organized into distinct regions:
+ [ -1 (invalid) ][ 0 ... OP_INT64_MAX ][ OP_INT64_MIN ... -2 ][-1 (invalid) ]
+
+ No one should actually use granule positions so large that they're negative,
+ even if they are technically valid, as very little software handles them
+ correctly (including most of Xiph.Org's).
+ This library also refuses to support durations so large they won't fit in a
+ signed 64-bit integer (to avoid exposing this mess to the application, and
+ to simplify a good deal of internal arithmetic), so the only way to use them
+ successfully is if pcm_start is very large.
+ This means there isn't anything you can do with negative granule positions
+ that you couldn't have done with purely non-negative ones.
+ The main purpose of these routines is to allow us to think very explicitly
+ about the possible failure cases of all granule position manipulations.*/
+
+/*Safely adds a small signed integer to a valid (not -1) granule position.
+ The result can use the full 64-bit range of values (both positive and
+ negative), but will fail on overflow (wrapping past -1; wrapping past
+ OP_INT64_MAX is explicitly okay).
+ [out] _dst_gp: The resulting granule position.
+ Only modified on success.
+ _src_gp: The granule position to add to.
+ This must not be -1.
+ _delta: The amount to add.
+ This is allowed to be up to 32 bits to support the maximum
+ duration of a single Ogg page (255 packets * 120 ms per
+ packet == 1,468,800 samples at 48 kHz).
+ Return: 0 on success, or OP_EINVAL if the result would wrap around past -1.*/
+static int op_granpos_add(ogg_int64_t *_dst_gp,ogg_int64_t _src_gp,
+ opus_int32 _delta){
+ /*The code below handles this case correctly, but there's no reason we
+ should ever be called with these values, so make sure we aren't.*/
+ OP_ASSERT(_src_gp!=-1);
+ if(_delta>0){
+ /*Adding this amount to the granule position would overflow its 64-bit
+ range.*/
+ if(OP_UNLIKELY(_src_gp<0)&&OP_UNLIKELY(_src_gp>=-1-_delta))return OP_EINVAL;
+ if(OP_UNLIKELY(_src_gp>OP_INT64_MAX-_delta)){
+ /*Adding this amount to the granule position would overflow the positive
+ half of its 64-bit range.
+ Since signed overflow is undefined in C, do it in a way the compiler
+ isn't allowed to screw up.*/
+ _delta-=(opus_int32)(OP_INT64_MAX-_src_gp)+1;
+ _src_gp=OP_INT64_MIN;
+ }
+ }
+ else if(_delta<0){
+ /*Subtracting this amount from the granule position would underflow its
+ 64-bit range.*/
+ if(_src_gp>=0&&OP_UNLIKELY(_src_gp<-_delta))return OP_EINVAL;
+ if(OP_UNLIKELY(_src_gp<OP_INT64_MIN-_delta)){
+ /*Subtracting this amount from the granule position would underflow the
+ negative half of its 64-bit range.
+ Since signed underflow is undefined in C, do it in a way the compiler
+ isn't allowed to screw up.*/
+ _delta+=(opus_int32)(_src_gp-OP_INT64_MIN)+1;
+ _src_gp=OP_INT64_MAX;
+ }
+ }
+ *_dst_gp=_src_gp+_delta;
+ return 0;
+}
+
+/*Safely computes the difference between two granule positions.
+ The difference must fit in a signed 64-bit integer, or the function fails.
+ It correctly handles the case where the granule position has wrapped around
+ from positive values to negative ones.
+ [out] _delta: The difference between the granule positions.
+ Only modified on success.
+ _gp_a: The granule position to subtract from.
+ This must not be -1.
+ _gp_b: The granule position to subtract.
+ This must not be -1.
+ Return: 0 on success, or OP_EINVAL if the result would not fit in a signed
+ 64-bit integer.*/
+static int op_granpos_diff(ogg_int64_t *_delta,
+ ogg_int64_t _gp_a,ogg_int64_t _gp_b){
+ int gp_a_negative;
+ int gp_b_negative;
+ /*The code below handles these cases correctly, but there's no reason we
+ should ever be called with these values, so make sure we aren't.*/
+ OP_ASSERT(_gp_a!=-1);
+ OP_ASSERT(_gp_b!=-1);
+ gp_a_negative=OP_UNLIKELY(_gp_a<0);
+ gp_b_negative=OP_UNLIKELY(_gp_b<0);
+ if(OP_UNLIKELY(gp_a_negative^gp_b_negative)){
+ ogg_int64_t da;
+ ogg_int64_t db;
+ if(gp_a_negative){
+ /*_gp_a has wrapped to a negative value but _gp_b hasn't: the difference
+ should be positive.*/
+ /*Step 1: Handle wrapping.*/
+ /*_gp_a < 0 => da < 0.*/
+ da=(OP_INT64_MIN-_gp_a)-1;
+ /*_gp_b >= 0 => db >= 0.*/
+ db=OP_INT64_MAX-_gp_b;
+ /*Step 2: Check for overflow.*/
+ if(OP_UNLIKELY(OP_INT64_MAX+da<db))return OP_EINVAL;
+ *_delta=db-da;
+ }
+ else{
+ /*_gp_b has wrapped to a negative value but _gp_a hasn't: the difference
+ should be negative.*/
+ /*Step 1: Handle wrapping.*/
+ /*_gp_a >= 0 => da <= 0*/
+ da=_gp_a+OP_INT64_MIN;
+ /*_gp_b < 0 => db <= 0*/
+ db=OP_INT64_MIN-_gp_b;
+ /*Step 2: Check for overflow.*/
+ if(OP_UNLIKELY(da<OP_INT64_MIN-db))return OP_EINVAL;
+ *_delta=da+db;
+ }
+ }
+ else *_delta=_gp_a-_gp_b;
+ return 0;
+}
+
+static int op_granpos_cmp(ogg_int64_t _gp_a,ogg_int64_t _gp_b){
+ /*The invalid granule position -1 should behave like NaN: neither greater
+ than nor less than any other granule position, nor equal to any other
+ granule position, including itself.
+ However, that means there isn't anything we could sensibly return from this
+ function for it.*/
+ OP_ASSERT(_gp_a!=-1);
+ OP_ASSERT(_gp_b!=-1);
+ /*Handle the wrapping cases.*/
+ if(OP_UNLIKELY(_gp_a<0)){
+ if(_gp_b>=0)return 1;
+ /*Else fall through.*/
+ }
+ else if(OP_UNLIKELY(_gp_b<0))return -1;
+ /*No wrapping case.*/
+ return (_gp_a>_gp_b)-(_gp_b>_gp_a);
+}
+
+/*Returns the duration of the packet (in samples at 48 kHz), or a negative
+ value on error.*/
+static int op_get_packet_duration(const unsigned char *_data,int _len){
+ int nframes;
+ int frame_size;
+ int nsamples;
+ nframes=opus_packet_get_nb_frames(_data,_len);
+ if(OP_UNLIKELY(nframes<0))return OP_EBADPACKET;
+ frame_size=opus_packet_get_samples_per_frame(_data,48000);
+ nsamples=nframes*frame_size;
+ if(OP_UNLIKELY(nsamples>120*48))return OP_EBADPACKET;
+ return nsamples;
+}
+
+/*This function more properly belongs in info.c, but we define it here to allow
+ the static granule position manipulation functions to remain static.*/
+ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp){
+ opus_int32 pre_skip;
+ pre_skip=_head->pre_skip;
+ if(_gp!=-1&&op_granpos_add(&_gp,_gp,-pre_skip))_gp=-1;
+ return _gp;
+}
+
+/*Grab all the packets currently in the stream state, and compute their
+ durations.
+ _of->op_count is set to the number of packets collected.
+ [out] _durations: Returns the durations of the individual packets.
+ Return: The total duration of all packets, or OP_HOLE if there was a hole.*/
+static opus_int32 op_collect_audio_packets(OggOpusFile *_of,
+ int _durations[255]){
+ opus_int32 total_duration;
+ int op_count;
+ /*Count the durations of all packets in the page.*/
+ op_count=0;
+ total_duration=0;
+ for(;;){
+ int ret;
+ /*This takes advantage of undocumented libogg behavior that returned
+ ogg_packet buffers are valid at least until the next page is
+ submitted.
+ Relying on this is not too terrible, as _none_ of the Ogg memory
+ ownership/lifetime rules are well-documented.
+ But I can read its code and know this will work.*/
+ ret=ogg_stream_packetout(&_of->os,_of->op+op_count);
+ if(!ret)break;
+ if(OP_UNLIKELY(ret<0)){
+ /*We shouldn't get holes in the middle of pages.*/
+ OP_ASSERT(op_count==0);
+ /*Set the return value and break out of the loop.
+ We want to make sure op_count gets set to 0, because we've ingested a
+ page, so any previously loaded packets are now invalid.*/
+ total_duration=OP_HOLE;
+ break;
+ }
+ /*Unless libogg is broken, we can't get more than 255 packets from a
+ single page.*/
+ OP_ASSERT(op_count<255);
+ _durations[op_count]=op_get_packet_duration(_of->op[op_count].packet,
+ _of->op[op_count].bytes);
+ if(OP_LIKELY(_durations[op_count]>0)){
+ /*With at most 255 packets on a page, this can't overflow.*/
+ total_duration+=_durations[op_count++];
+ }
+ /*Ignore packets with an invalid TOC sequence.*/
+ else if(op_count>0){
+ /*But save the granule position, if there was one.*/
+ _of->op[op_count-1].granulepos=_of->op[op_count].granulepos;
+ }
+ }
+ _of->op_pos=0;
+ _of->op_count=op_count;
+ return total_duration;
+}
+
+/*Starting from current cursor position, get the initial PCM offset of the next
+ page.
+ This also validates the granule position on the first page with a completed
+ audio data packet, as required by the spec.
+ If this link is completely empty (no pages with completed packets), then this
+ function sets pcm_start=pcm_end=0 and returns the BOS page of the next link
+ (if any).
+ In the seekable case, we initialize pcm_end=-1 before calling this function,
+ so that later we can detect that the link was empty before calling
+ op_find_final_pcm_offset().
+ [inout] _link: The link for which to find pcm_start.
+ [out] _og: Returns the BOS page of the next link if this link was empty.
+ In the unseekable case, we can then feed this to
+ op_fetch_headers() to start the next link.
+ The caller may pass NULL (e.g., for seekable streams), in
+ which case this page will be discarded.
+ Return: 0 on success, 1 if there is a buffered BOS page available, or a
+ negative value on unrecoverable error.*/
+static int op_find_initial_pcm_offset(OggOpusFile *_of,
+ OggOpusLink *_link,ogg_page *_og){
+ ogg_page og;
+ ogg_int64_t pcm_start;
+ ogg_int64_t prev_packet_gp;
+ ogg_int64_t cur_page_gp;
+ ogg_uint32_t serialno;
+ opus_int32 total_duration;
+ int durations[255];
+ int cur_page_eos;
+ int op_count;
+ int pi;
+ if(_og==NULL)_og=&og;
+ serialno=_of->os.serialno;
+ op_count=0;
+ /*We shouldn't have to initialize total_duration, but gcc is too dumb to
+ figure out that op_count>0 implies we've been through the whole loop at
+ least once.*/
+ total_duration=0;
+ do{
+ opus_int64 llret;
+ llret=op_get_next_page(_of,_og,_of->end);
+ /*We should get a page unless the file is truncated or mangled.
+ Otherwise there are no audio data packets in the whole logical stream.*/
+ if(OP_UNLIKELY(llret<0)){
+ /*Fail if there was a read error.*/
+ if(llret<OP_FALSE)return (int)llret;
+ /*Fail if the pre-skip is non-zero, since it's asking us to skip more
+ samples than exist.*/
+ if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP;
+ /*Set pcm_end and end_offset so we can skip the call to
+ op_find_final_pcm_offset().*/
+ _link->pcm_start=_link->pcm_end=0;
+ _link->end_offset=_link->data_offset;
+ return 0;
+ }
+ /*Similarly, if we hit the next link in the chain, we've gone too far.*/
+ if(OP_UNLIKELY(ogg_page_bos(_og))){
+ if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP;
+ /*Set pcm_end and end_offset so we can skip the call to
+ op_find_final_pcm_offset().*/
+ _link->pcm_end=_link->pcm_start=0;
+ _link->end_offset=_link->data_offset;
+ /*Tell the caller we've got a buffered page for them.*/
+ return 1;
+ }
+ /*Ignore pages from other streams (not strictly necessary, because of the
+ checks in ogg_stream_pagein(), but saves some work).*/
+ if(serialno!=(ogg_uint32_t)ogg_page_serialno(_og))continue;
+ ogg_stream_pagein(&_of->os,_og);
+ /*Bitrate tracking: add the header's bytes here.
+ The body bytes are counted when we consume the packets.*/
+ _of->bytes_tracked+=_og->header_len;
+ /*Count the durations of all packets in the page.*/
+ do total_duration=op_collect_audio_packets(_of,durations);
+ /*Ignore holes.*/
+ while(OP_UNLIKELY(total_duration<0));
+ op_count=_of->op_count;
+ }
+ while(op_count<=0);
+ /*We found the first page with a completed audio data packet: actually look
+ at the granule position.
+ RFC 3533 says, "A special value of -1 (in two's complement) indicates that
+ no packets finish on this page," which does not say that a granule
+ position that is NOT -1 indicates that some packets DO finish on that page
+ (even though this was the intention, libogg itself violated this intention
+ for years before we fixed it).
+ The Ogg Opus specification only imposes its start-time requirements
+ on the granule position of the first page with completed packets,
+ so we ignore any set granule positions until then.*/
+ cur_page_gp=_of->op[op_count-1].granulepos;
+ /*But getting a packet without a valid granule position on the page is not
+ okay.*/
+ if(cur_page_gp==-1)return OP_EBADTIMESTAMP;
+ cur_page_eos=_of->op[op_count-1].e_o_s;
+ if(OP_LIKELY(!cur_page_eos)){
+ /*The EOS flag wasn't set.
+ Work backwards from the provided granule position to get the starting PCM
+ offset.*/
+ if(OP_UNLIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){
+ /*The starting granule position MUST not be smaller than the amount of
+ audio on the first page with completed packets.*/
+ return OP_EBADTIMESTAMP;
+ }
+ }
+ else{
+ /*The first page with completed packets was also the last.*/
+ if(OP_LIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){
+ /*If there's less audio on the page than indicated by the granule
+ position, then we're doing end-trimming, and the starting PCM offset
+ is zero by spec mandate.*/
+ pcm_start=0;
+ /*However, the end-trimming MUST not ask us to trim more samples than
+ exist after applying the pre-skip.*/
+ if(OP_UNLIKELY(op_granpos_cmp(cur_page_gp,_link->head.pre_skip)<0)){
+ return OP_EBADTIMESTAMP;
+ }
+ }
+ }
+ /*Timestamp the individual packets.*/
+ prev_packet_gp=pcm_start;
+ for(pi=0;pi<op_count;pi++){
+ if(cur_page_eos){
+ ogg_int64_t diff;
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp));
+ diff=durations[pi]-diff;
+ /*If we have samples to trim...*/
+ if(diff>0){
+ /*If we trimmed the entire packet, stop (the spec says encoders
+ shouldn't do this, but we support it anyway).*/
+ if(OP_UNLIKELY(diff>durations[pi]))break;
+ _of->op[pi].granulepos=prev_packet_gp=cur_page_gp;
+ /*Move the EOS flag to this packet, if necessary, so we'll trim the
+ samples.*/
+ _of->op[pi].e_o_s=1;
+ continue;
+ }
+ }
+ /*Update the granule position as normal.*/
+ OP_ALWAYS_TRUE(!op_granpos_add(&_of->op[pi].granulepos,
+ prev_packet_gp,durations[pi]));
+ prev_packet_gp=_of->op[pi].granulepos;
+ }
+ /*Update the packet count after end-trimming.*/
+ _of->op_count=pi;
+ _of->cur_discard_count=_link->head.pre_skip;
+ _of->prev_packet_gp=_link->pcm_start=pcm_start;
+ return 0;
+}
+
+/*Starting from current cursor position, get the final PCM offset of the
+ previous page.
+ This also validates the duration of the link, which, while not strictly
+ required by the spec, we need to ensure duration calculations don't
+ overflow.
+ This is only done for seekable sources.
+ We must validate that op_find_initial_pcm_offset() succeeded for this link
+ before calling this function, otherwise it will scan the entire stream
+ backwards until it reaches the start, and then fail.*/
+static int op_find_final_pcm_offset(OggOpusFile *_of,
+ const ogg_uint32_t *_serialnos,int _nserialnos,OggOpusLink *_link,
+ opus_int64 _offset,ogg_uint32_t _end_serialno,ogg_int64_t _end_gp,
+ ogg_int64_t *_total_duration){
+ ogg_int64_t total_duration;
+ ogg_int64_t duration;
+ ogg_uint32_t cur_serialno;
+ /*For the time being, fetch end PCM offset the simple way.*/
+ cur_serialno=_link->serialno;
+ if(_end_serialno!=cur_serialno||_end_gp==-1){
+ _offset=op_get_last_page(_of,&_end_gp,_offset,
+ cur_serialno,_serialnos,_nserialnos);
+ if(OP_UNLIKELY(_offset<0))return (int)_offset;
+ }
+ /*At worst we should have found the first page with completed packets.*/
+ if(OP_UNLIKELY(_offset<_link->data_offset))return OP_EBADLINK;
+ /*This implementation requires that the difference between the first and last
+ granule positions in each link be representable in a signed, 64-bit
+ number, and that each link also have at least as many samples as the
+ pre-skip requires.*/
+ if(OP_UNLIKELY(op_granpos_diff(&duration,_end_gp,_link->pcm_start)<0)
+ ||OP_UNLIKELY(duration<_link->head.pre_skip)){
+ return OP_EBADTIMESTAMP;
+ }
+ /*We also require that the total duration be representable in a signed,
+ 64-bit number.*/
+ duration-=_link->head.pre_skip;
+ total_duration=*_total_duration;
+ if(OP_UNLIKELY(OP_INT64_MAX-duration<total_duration))return OP_EBADTIMESTAMP;
+ *_total_duration=total_duration+duration;
+ _link->pcm_end=_end_gp;
+ _link->end_offset=_offset;
+ return 0;
+}
+
+/*Rescale the number _x from the range [0,_from] to [0,_to].
+ _from and _to must be positive.*/
+static opus_int64 op_rescale64(opus_int64 _x,opus_int64 _from,opus_int64 _to){
+ opus_int64 frac;
+ opus_int64 ret;
+ int i;
+ if(_x>=_from)return _to;
+ if(_x<=0)return 0;
+ frac=0;
+ for(i=0;i<63;i++){
+ frac<<=1;
+ OP_ASSERT(_x<=_from);
+ if(_x>=_from>>1){
+ _x-=_from-_x;
+ frac|=1;
+ }
+ else _x<<=1;
+ }
+ ret=0;
+ for(i=0;i<63;i++){
+ if(frac&1)ret=(ret&_to&1)+(ret>>1)+(_to>>1);
+ else ret>>=1;
+ frac>>=1;
+ }
+ return ret;
+}
+
+/*The minimum granule position spacing allowed for making predictions.
+ This corresponds to about 1 second of audio at 48 kHz for both Opus and
+ Vorbis, or one keyframe interval in Theora with the default keyframe spacing
+ of 256.*/
+#define OP_GP_SPACING_MIN (48000)
+
+/*Try to estimate the location of the next link using the current seek
+ records, assuming the initial granule position of any streams we've found is
+ 0.*/
+static opus_int64 op_predict_link_start(const OpusSeekRecord *_sr,int _nsr,
+ opus_int64 _searched,opus_int64 _end_searched,opus_int32 _bias){
+ opus_int64 bisect;
+ int sri;
+ int srj;
+ /*Require that we be at least OP_CHUNK_SIZE from the end.
+ We don't require that we be at least OP_CHUNK_SIZE from the beginning,
+ because if we are we'll just scan forward without seeking.*/
+ _end_searched-=OP_CHUNK_SIZE;
+ if(_searched>=_end_searched)return -1;
+ bisect=_end_searched;
+ for(sri=0;sri<_nsr;sri++){
+ ogg_int64_t gp1;
+ ogg_int64_t gp2_min;
+ ogg_uint32_t serialno1;
+ opus_int64 offset1;
+ /*If the granule position is negative, either it's invalid or we'd cause
+ overflow.*/
+ gp1=_sr[sri].gp;
+ if(gp1<0)continue;
+ /*We require some minimum distance between granule positions to make an
+ estimate.
+ We don't actually know what granule position scheme is being used,
+ because we have no idea what kind of stream these came from.
+ Therefore we require a minimum spacing between them, with the
+ expectation that while bitrates and granule position increments might
+ vary locally in quite complex ways, they are globally smooth.*/
+ if(OP_UNLIKELY(op_granpos_add(&gp2_min,gp1,OP_GP_SPACING_MIN)<0)){
+ /*No granule position would satisfy us.*/
+ continue;
+ }
+ offset1=_sr[sri].offset;
+ serialno1=_sr[sri].serialno;
+ for(srj=sri;srj-->0;){
+ ogg_int64_t gp2;
+ opus_int64 offset2;
+ opus_int64 num;
+ ogg_int64_t den;
+ ogg_int64_t ipart;
+ gp2=_sr[srj].gp;
+ if(gp2<gp2_min)continue;
+ /*Oh, and also make sure these came from the same stream.*/
+ if(_sr[srj].serialno!=serialno1)continue;
+ offset2=_sr[srj].offset;
+ /*For once, we can subtract with impunity.*/
+ den=gp2-gp1;
+ ipart=gp2/den;
+ num=offset2-offset1;
+ OP_ASSERT(num>0);
+ if(ipart>0&&(offset2-_searched)/ipart<num)continue;
+ offset2-=ipart*num;
+ gp2-=ipart*den;
+ offset2-=op_rescale64(gp2,den,num)-_bias;
+ if(offset2<_searched)continue;
+ bisect=OP_MIN(bisect,offset2);
+ break;
+ }
+ }
+ return bisect>=_end_searched?-1:bisect;
+}
+
+/*Finds each bitstream link, one at a time, using a bisection search.
+ This has to begin by knowing the offset of the first link's initial page.*/
+static int op_bisect_forward_serialno(OggOpusFile *_of,
+ opus_int64 _searched,OpusSeekRecord *_sr,int _csr,
+ ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){
+ ogg_page og;
+ OggOpusLink *links;
+ int nlinks;
+ int clinks;
+ ogg_uint32_t *serialnos;
+ int nserialnos;
+ ogg_int64_t total_duration;
+ int nsr;
+ int ret;
+ links=_of->links;
+ nlinks=clinks=_of->nlinks;
+ total_duration=0;
+ /*We start with one seek record, for the last page in the file.
+ We build up a list of records for places we seek to during link
+ enumeration.
+ This list is kept sorted in reverse order.
+ We only care about seek locations that were _not_ in the current link,
+ therefore we can add them one at a time to the end of the list as we
+ improve the lower bound on the location where the next link starts.*/
+ nsr=1;
+ for(;;){
+ opus_int64 end_searched;
+ opus_int64 bisect;
+ opus_int64 next;
+ opus_int64 last;
+ ogg_int64_t end_offset;
+ ogg_int64_t end_gp;
+ int sri;
+ serialnos=*_serialnos;
+ nserialnos=*_nserialnos;
+ if(OP_UNLIKELY(nlinks>=clinks)){
+ if(OP_UNLIKELY(clinks>INT_MAX-1>>1))return OP_EFAULT;
+ clinks=2*clinks+1;
+ OP_ASSERT(nlinks<clinks);
+ links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*clinks);
+ if(OP_UNLIKELY(links==NULL))return OP_EFAULT;
+ _of->links=links;
+ }
+ /*Invariants:
+ We have the headers and serial numbers for the link beginning at 'begin'.
+ We have the offset and granule position of the last page in the file
+ (potentially not a page we care about).*/
+ /*Scan the seek records we already have to save us some bisection.*/
+ for(sri=0;sri<nsr;sri++){
+ if(op_lookup_serialno(_sr[sri].serialno,serialnos,nserialnos))break;
+ }
+ /*Is the last page in our current list of serial numbers?*/
+ if(sri<=0)break;
+ /*Last page wasn't found.
+ We have at least one more link.*/
+ last=-1;
+ end_searched=_sr[sri-1].search_start;
+ next=_sr[sri-1].offset;
+ end_gp=-1;
+ if(sri<nsr){
+ _searched=_sr[sri].offset+_sr[sri].size;
+ if(_sr[sri].serialno==links[nlinks-1].serialno){
+ end_gp=_sr[sri].gp;
+ end_offset=_sr[sri].offset;
+ }
+ }
+ nsr=sri;
+ bisect=-1;
+ /*If we've already found the end of at least one link, try to pick the
+ first bisection point at twice the average link size.
+ This is a good choice for files with lots of links that are all about the
+ same size.*/
+ if(nlinks>1){
+ opus_int64 last_offset;
+ opus_int64 avg_link_size;
+ opus_int64 upper_limit;
+ last_offset=links[nlinks-1].offset;
+ avg_link_size=last_offset/(nlinks-1);
+ upper_limit=end_searched-OP_CHUNK_SIZE-avg_link_size;
+ if(OP_LIKELY(last_offset>_searched-avg_link_size)
+ &&OP_LIKELY(last_offset<upper_limit)){
+ bisect=last_offset+avg_link_size;
+ if(OP_LIKELY(bisect<upper_limit))bisect+=avg_link_size;
+ }
+ }
+ /*We guard against garbage separating the last and first pages of two
+ links below.*/
+ while(_searched<end_searched){
+ opus_int32 next_bias;
+ /*If we don't have a better estimate, use simple bisection.*/
+ if(bisect==-1)bisect=_searched+(end_searched-_searched>>1);
+ /*If we're within OP_CHUNK_SIZE of the start, scan forward.*/
+ if(bisect-_searched<OP_CHUNK_SIZE)bisect=_searched;
+ /*Otherwise we're skipping data.
+ Forget the end page, if we saw one, as we might miss a later one.*/
+ else end_gp=-1;
+ ret=op_seek_helper(_of,bisect);
+ if(OP_UNLIKELY(ret<0))return ret;
+ last=op_get_next_page(_of,&og,_sr[nsr-1].offset);
+ if(OP_UNLIKELY(last<OP_FALSE))return (int)last;
+ next_bias=0;
+ if(last==OP_FALSE)end_searched=bisect;
+ else{
+ ogg_uint32_t serialno;
+ ogg_int64_t gp;
+ serialno=ogg_page_serialno(&og);
+ gp=ogg_page_granulepos(&og);
+ if(!op_lookup_serialno(serialno,serialnos,nserialnos)){
+ end_searched=bisect;
+ next=last;
+ /*In reality we should always have enough room, but be paranoid.*/
+ if(OP_LIKELY(nsr<_csr)){
+ _sr[nsr].search_start=bisect;
+ _sr[nsr].offset=last;
+ OP_ASSERT(_of->offset-last>=0);
+ OP_ASSERT(_of->offset-last<=OP_PAGE_SIZE_MAX);
+ _sr[nsr].size=(opus_int32)(_of->offset-last);
+ _sr[nsr].serialno=serialno;
+ _sr[nsr].gp=gp;
+ nsr++;
+ }
+ }
+ else{
+ _searched=_of->offset;
+ next_bias=OP_CHUNK_SIZE;
+ if(serialno==links[nlinks-1].serialno){
+ /*This page was from the stream we want, remember it.
+ If it's the last such page in the link, we won't have to go back
+ looking for it later.*/
+ end_gp=gp;
+ end_offset=last;
+ }
+ }
+ }
+ bisect=op_predict_link_start(_sr,nsr,_searched,end_searched,next_bias);
+ }
+ /*Bisection point found.
+ Get the final granule position of the previous link, assuming
+ op_find_initial_pcm_offset() didn't already determine the link was
+ empty.*/
+ if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){
+ if(end_gp==-1){
+ /*If we don't know where the end page is, we'll have to seek back and
+ look for it, starting from the end of the link.*/
+ end_offset=next;
+ /*Also forget the last page we read.
+ It won't be available after the seek.*/
+ last=-1;
+ }
+ ret=op_find_final_pcm_offset(_of,serialnos,nserialnos,
+ links+nlinks-1,end_offset,links[nlinks-1].serialno,end_gp,
+ &total_duration);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ if(last!=next){
+ /*The last page we read was not the first page the next link.
+ Move the cursor position to the offset of that first page.
+ This only performs an actual seek if the first page of the next link
+ does not start at the end of the last page from the current Opus
+ stream with a valid granule position.*/
+ ret=op_seek_helper(_of,next);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ ret=op_fetch_headers(_of,&links[nlinks].head,&links[nlinks].tags,
+ _serialnos,_nserialnos,_cserialnos,last!=next?NULL:&og);
+ if(OP_UNLIKELY(ret<0))return ret;
+ links[nlinks].offset=next;
+ links[nlinks].data_offset=_of->offset;
+ links[nlinks].serialno=_of->os.serialno;
+ links[nlinks].pcm_end=-1;
+ /*This might consume a page from the next link, however the next bisection
+ always starts with a seek.*/
+ ret=op_find_initial_pcm_offset(_of,links+nlinks,NULL);
+ if(OP_UNLIKELY(ret<0))return ret;
+ _searched=_of->offset;
+ /*Mark the current link count so it can be cleaned up on error.*/
+ _of->nlinks=++nlinks;
+ }
+ /*Last page is in the starting serialno list, so we've reached the last link.
+ Now find the last granule position for it (if we didn't the first time we
+ looked at the end of the stream, and if op_find_initial_pcm_offset()
+ didn't already determine the link was empty).*/
+ if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){
+ ret=op_find_final_pcm_offset(_of,serialnos,nserialnos,
+ links+nlinks-1,_sr[0].offset,_sr[0].serialno,_sr[0].gp,&total_duration);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ /*Trim back the links array if necessary.*/
+ links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*nlinks);
+ if(OP_LIKELY(links!=NULL))_of->links=links;
+ /*We also don't need these anymore.*/
+ _ogg_free(*_serialnos);
+ *_serialnos=NULL;
+ *_cserialnos=*_nserialnos=0;
+ return 0;
+}
+
+static void op_update_gain(OggOpusFile *_of){
+ OpusHead *head;
+ opus_int32 gain_q8;
+ int li;
+ /*If decode isn't ready, then we'll apply the gain when we initialize the
+ decoder.*/
+ if(_of->ready_state<OP_INITSET)return;
+ gain_q8=_of->gain_offset_q8;
+ li=_of->seekable?_of->cur_link:0;
+ head=&_of->links[li].head;
+ /*We don't have to worry about overflow here because the header gain and
+ track gain must lie in the range [-32768,32767], and the user-supplied
+ offset has been pre-clamped to [-98302,98303].*/
+ switch(_of->gain_type){
+ case OP_TRACK_GAIN:{
+ int track_gain_q8;
+ track_gain_q8=0;
+ opus_tags_get_track_gain(&_of->links[li].tags,&track_gain_q8);
+ gain_q8+=track_gain_q8;
+ }
+ /*Fall through.*/
+ case OP_HEADER_GAIN:gain_q8+=head->output_gain;break;
+ case OP_ABSOLUTE_GAIN:break;
+ default:OP_ASSERT(0);
+ }
+ gain_q8=OP_CLAMP(-32768,gain_q8,32767);
+ OP_ASSERT(_of->od!=NULL);
+#if defined(OPUS_SET_GAIN)
+ opus_multistream_decoder_ctl(_of->od,OPUS_SET_GAIN(gain_q8));
+#else
+/*A fallback that works with both float and fixed-point is a bunch of work,
+ so just force people to use a sufficiently new version.
+ This is deployed well enough at this point that this shouldn't be a burden.*/
+# error "libopus 1.0.1 or later required"
+#endif
+}
+
+static int op_make_decode_ready(OggOpusFile *_of){
+ const OpusHead *head;
+ int li;
+ int stream_count;
+ int coupled_count;
+ int channel_count;
+ if(_of->ready_state>OP_STREAMSET)return 0;
+ if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET))return OP_EFAULT;
+ li=_of->seekable?_of->cur_link:0;
+ head=&_of->links[li].head;
+ stream_count=head->stream_count;
+ coupled_count=head->coupled_count;
+ channel_count=head->channel_count;
+ /*Check to see if the current decoder is compatible with the current link.*/
+ if(_of->od!=NULL&&_of->od_stream_count==stream_count
+ &&_of->od_coupled_count==coupled_count&&_of->od_channel_count==channel_count
+ &&memcmp(_of->od_mapping,head->mapping,
+ sizeof(*head->mapping)*channel_count)==0){
+ opus_multistream_decoder_ctl(_of->od,OPUS_RESET_STATE);
+ }
+ else{
+ int err;
+ opus_multistream_decoder_destroy(_of->od);
+ _of->od=opus_multistream_decoder_create(48000,channel_count,
+ stream_count,coupled_count,head->mapping,&err);
+ if(_of->od==NULL)return OP_EFAULT;
+ _of->od_stream_count=stream_count;
+ _of->od_coupled_count=coupled_count;
+ _of->od_channel_count=channel_count;
+ memcpy(_of->od_mapping,head->mapping,sizeof(*head->mapping)*channel_count);
+ }
+ _of->ready_state=OP_INITSET;
+ _of->bytes_tracked=0;
+ _of->samples_tracked=0;
+#if !defined(OPUS_FIXED_POINT)
+ _of->state_channel_count=0;
+ /*Use the serial number for the PRNG seed to get repeatable output for
+ straight play-throughs.*/
+ _of->dither_seed=_of->links[li].serialno;
+#endif
+ op_update_gain(_of);
+ return 0;
+}
+
+static int op_open_seekable2_impl(OggOpusFile *_of){
+ /*64 seek records should be enough for anybody.
+ Actually, with a bisection search in a 63-bit range down to OP_CHUNK_SIZE
+ granularity, much more than enough.*/
+ OpusSeekRecord sr[64];
+ opus_int64 data_offset;
+ int ret;
+ /*We can seek, so set out learning all about this file.*/
+ (*_of->callbacks.seek)(_of->source,0,SEEK_END);
+ _of->offset=_of->end=(*_of->callbacks.tell)(_of->source);
+ if(OP_UNLIKELY(_of->end<0))return OP_EREAD;
+ data_offset=_of->links[0].data_offset;
+ if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK;
+ /*Get the offset of the last page of the physical bitstream, or, if we're
+ lucky, the last Opus page of the first link, as most Ogg Opus files will
+ contain a single logical bitstream.*/
+ ret=op_get_prev_page_serial(_of,sr,_of->end,
+ _of->links[0].serialno,_of->serialnos,_of->nserialnos);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*If there's any trailing junk, forget about it.*/
+ _of->end=sr[0].offset+sr[0].size;
+ if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK;
+ /*Now enumerate the bitstream structure.*/
+ return op_bisect_forward_serialno(_of,data_offset,sr,sizeof(sr)/sizeof(*sr),
+ &_of->serialnos,&_of->nserialnos,&_of->cserialnos);
+}
+
+static int op_open_seekable2(OggOpusFile *_of){
+ ogg_sync_state oy_start;
+ ogg_stream_state os_start;
+ ogg_packet *op_start;
+ opus_int64 start_offset;
+ int start_op_count;
+ int ret;
+ /*We're partially open and have a first link header state in storage in _of.
+ Save off that stream state so we can come back to it.
+ It would be simpler to just dump all this state and seek back to
+ links[0].data_offset when we're done.
+ But we do the extra work to allow us to seek back to _exactly_ the same
+ stream position we're at now.
+ This allows, e.g., the HTTP backend to continue reading from the original
+ connection (if it's still available), instead of opening a new one.
+ This means we can open and start playing a normal Opus file with a single
+ link and reasonable packet sizes using only two HTTP requests.*/
+ start_op_count=_of->op_count;
+ /*This is a bit too large to put on the stack unconditionally.*/
+ op_start=(ogg_packet *)_ogg_malloc(sizeof(*op_start)*start_op_count);
+ if(op_start==NULL)return OP_EFAULT;
+ *&oy_start=_of->oy;
+ *&os_start=_of->os;
+ start_offset=_of->offset;
+ memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count);
+ OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of));
+ ogg_sync_init(&_of->oy);
+ ogg_stream_init(&_of->os,-1);
+ ret=op_open_seekable2_impl(_of);
+ /*Restore the old stream state.*/
+ ogg_stream_clear(&_of->os);
+ ogg_sync_clear(&_of->oy);
+ *&_of->oy=*&oy_start;
+ *&_of->os=*&os_start;
+ _of->offset=start_offset;
+ _of->op_count=start_op_count;
+ memcpy(_of->op,op_start,sizeof(*_of->op)*start_op_count);
+ _ogg_free(op_start);
+ _of->prev_packet_gp=_of->links[0].pcm_start;
+ _of->cur_discard_count=_of->links[0].head.pre_skip;
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*And restore the position indicator.*/
+ ret=(*_of->callbacks.seek)(_of->source,op_position(_of),SEEK_SET);
+ return OP_UNLIKELY(ret<0)?OP_EREAD:0;
+}
+
+/*Clear out the current logical bitstream decoder.*/
+static void op_decode_clear(OggOpusFile *_of){
+ /*We don't actually free the decoder.
+ We might be able to re-use it for the next link.*/
+ _of->op_count=0;
+ _of->od_buffer_size=0;
+ _of->prev_packet_gp=-1;
+ if(!_of->seekable){
+ OP_ASSERT(_of->ready_state>=OP_INITSET);
+ opus_tags_clear(&_of->links[0].tags);
+ }
+ _of->ready_state=OP_OPENED;
+}
+
+static void op_clear(OggOpusFile *_of){
+ OggOpusLink *links;
+ _ogg_free(_of->od_buffer);
+ if(_of->od!=NULL)opus_multistream_decoder_destroy(_of->od);
+ links=_of->links;
+ if(!_of->seekable){
+ if(_of->ready_state>OP_OPENED||_of->ready_state==OP_PARTOPEN){
+ opus_tags_clear(&links[0].tags);
+ }
+ }
+ else if(OP_LIKELY(links!=NULL)){
+ int nlinks;
+ int link;
+ nlinks=_of->nlinks;
+ for(link=0;link<nlinks;link++)opus_tags_clear(&links[link].tags);
+ }
+ _ogg_free(links);
+ _ogg_free(_of->serialnos);
+ ogg_stream_clear(&_of->os);
+ ogg_sync_clear(&_of->oy);
+ if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->source);
+}
+
+static int op_open1(OggOpusFile *_of,
+ void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes){
+ ogg_page og;
+ ogg_page *pog;
+ int seekable;
+ int ret;
+ memset(_of,0,sizeof(*_of));
+ _of->end=-1;
+ _of->source=_source;
+ *&_of->callbacks=*_cb;
+ /*At a minimum, we need to be able to read data.*/
+ if(OP_UNLIKELY(_of->callbacks.read==NULL))return OP_EREAD;
+ /*Initialize the framing state.*/
+ ogg_sync_init(&_of->oy);
+ /*Perhaps some data was previously read into a buffer for testing against
+ other stream types.
+ Allow initialization from this previously read data (especially as we may
+ be reading from a non-seekable stream).
+ This requires copying it into a buffer allocated by ogg_sync_buffer() and
+ doesn't support seeking, so this is not a good mechanism to use for
+ decoding entire files from RAM.*/
+ if(_initial_bytes>0){
+ char *buffer;
+ buffer=ogg_sync_buffer(&_of->oy,_initial_bytes);
+ memcpy(buffer,_initial_data,_initial_bytes*sizeof(*buffer));
+ ogg_sync_wrote(&_of->oy,_initial_bytes);
+ }
+ /*Can we seek?
+ Stevens suggests the seek test is portable.*/
+ seekable=_cb->seek!=NULL&&(*_cb->seek)(_source,0,SEEK_CUR)!=-1;
+ /*If seek is implemented, tell must also be implemented.*/
+ if(seekable){
+ opus_int64 pos;
+ if(OP_UNLIKELY(_of->callbacks.tell==NULL))return OP_EINVAL;
+ pos=(*_of->callbacks.tell)(_of->source);
+ /*If the current position is not equal to the initial bytes consumed,
+ absolute seeking will not work.*/
+ if(OP_UNLIKELY(pos!=(opus_int64)_initial_bytes))return OP_EINVAL;
+ }
+ _of->seekable=seekable;
+ /*Don't seek yet.
+ Set up a 'single' (current) logical bitstream entry for partial open.*/
+ _of->links=(OggOpusLink *)_ogg_malloc(sizeof(*_of->links));
+ /*The serialno gets filled in later by op_fetch_headers().*/
+ ogg_stream_init(&_of->os,-1);
+ pog=NULL;
+ for(;;){
+ /*Fetch all BOS pages, store the Opus header and all seen serial numbers,
+ and load subsequent Opus setup headers.*/
+ ret=op_fetch_headers(_of,&_of->links[0].head,&_of->links[0].tags,
+ &_of->serialnos,&_of->nserialnos,&_of->cserialnos,pog);
+ if(OP_UNLIKELY(ret<0))break;
+ _of->nlinks=1;
+ _of->links[0].offset=0;
+ _of->links[0].data_offset=_of->offset;
+ _of->links[0].pcm_end=-1;
+ _of->links[0].serialno=_of->os.serialno;
+ /*Fetch the initial PCM offset.*/
+ ret=op_find_initial_pcm_offset(_of,_of->links,&og);
+ if(seekable||OP_LIKELY(ret<=0))break;
+ /*This link was empty, but we already have the BOS page for the next one in
+ og.
+ We can't seek, so start processing the next link right now.*/
+ opus_tags_clear(&_of->links[0].tags);
+ _of->nlinks=0;
+ if(!seekable)_of->cur_link++;
+ pog=&og;
+ }
+ if(OP_LIKELY(ret>=0))_of->ready_state=OP_PARTOPEN;
+ return ret;
+}
+
+static int op_open2(OggOpusFile *_of){
+ int ret;
+ OP_ASSERT(_of->ready_state==OP_PARTOPEN);
+ if(_of->seekable){
+ _of->ready_state=OP_OPENED;
+ ret=op_open_seekable2(_of);
+ }
+ else ret=0;
+ if(OP_LIKELY(ret>=0)){
+ /*We have buffered packets from op_find_initial_pcm_offset().
+ Move to OP_INITSET so we can use them.*/
+ _of->ready_state=OP_STREAMSET;
+ ret=op_make_decode_ready(_of);
+ if(OP_LIKELY(ret>=0))return 0;
+ }
+ /*Don't auto-close the stream on failure.*/
+ _of->callbacks.close=NULL;
+ op_clear(_of);
+ return ret;
+}
+
+OggOpusFile *op_test_callbacks(void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes,int *_error){
+ OggOpusFile *of;
+ int ret;
+ of=(OggOpusFile *)_ogg_malloc(sizeof(*of));
+ ret=OP_EFAULT;
+ if(OP_LIKELY(of!=NULL)){
+ ret=op_open1(of,_source,_cb,_initial_data,_initial_bytes);
+ if(OP_LIKELY(ret>=0)){
+ if(_error!=NULL)*_error=0;
+ return of;
+ }
+ /*Don't auto-close the stream on failure.*/
+ of->callbacks.close=NULL;
+ op_clear(of);
+ _ogg_free(of);
+ }
+ if(_error!=NULL)*_error=ret;
+ return NULL;
+}
+
+OggOpusFile *op_open_callbacks(void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes,int *_error){
+ OggOpusFile *of;
+ of=op_test_callbacks(_source,_cb,_initial_data,_initial_bytes,_error);
+ if(OP_LIKELY(of!=NULL)){
+ int ret;
+ ret=op_open2(of);
+ if(OP_LIKELY(ret>=0))return of;
+ if(_error!=NULL)*_error=ret;
+ _ogg_free(of);
+ }
+ return NULL;
+}
+
+/*Convenience routine to clean up from failure for the open functions that
+ create their own streams.*/
+static OggOpusFile *op_open_close_on_failure(void *_source,
+ const OpusFileCallbacks *_cb,int *_error){
+ OggOpusFile *of;
+ if(OP_UNLIKELY(_source==NULL)){
+ if(_error!=NULL)*_error=OP_EFAULT;
+ return NULL;
+ }
+ of=op_open_callbacks(_source,_cb,NULL,0,_error);
+ if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source);
+ return of;
+}
+
+OggOpusFile *op_open_file(const char *_path,int *_error){
+ OpusFileCallbacks cb;
+ return op_open_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error);
+}
+
+OggOpusFile *op_open_memory(const unsigned char *_data,size_t _size,
+ int *_error){
+ OpusFileCallbacks cb;
+ return op_open_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb,
+ _error);
+}
+
+/*Convenience routine to clean up from failure for the open functions that
+ create their own streams.*/
+static OggOpusFile *op_test_close_on_failure(void *_source,
+ const OpusFileCallbacks *_cb,int *_error){
+ OggOpusFile *of;
+ if(OP_UNLIKELY(_source==NULL)){
+ if(_error!=NULL)*_error=OP_EFAULT;
+ return NULL;
+ }
+ of=op_test_callbacks(_source,_cb,NULL,0,_error);
+ if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source);
+ return of;
+}
+
+OggOpusFile *op_test_file(const char *_path,int *_error){
+ OpusFileCallbacks cb;
+ return op_test_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error);
+}
+
+OggOpusFile *op_test_memory(const unsigned char *_data,size_t _size,
+ int *_error){
+ OpusFileCallbacks cb;
+ return op_test_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb,
+ _error);
+}
+
+int op_test_open(OggOpusFile *_of){
+ int ret;
+ if(OP_UNLIKELY(_of->ready_state!=OP_PARTOPEN))return OP_EINVAL;
+ ret=op_open2(_of);
+ /*op_open2() will clear this structure on failure.
+ Reset its contents to prevent double-frees in op_free().*/
+ if(OP_UNLIKELY(ret<0))memset(_of,0,sizeof(*_of));
+ return ret;
+}
+
+void op_free(OggOpusFile *_of){
+ if(OP_LIKELY(_of!=NULL)){
+ op_clear(_of);
+ _ogg_free(_of);
+ }
+}
+
+int op_seekable(const OggOpusFile *_of){
+ return _of->seekable;
+}
+
+int op_link_count(const OggOpusFile *_of){
+ return _of->nlinks;
+}
+
+ogg_uint32_t op_serialno(const OggOpusFile *_of,int _li){
+ if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+ if(!_of->seekable)_li=0;
+ return _of->links[_li<0?_of->cur_link:_li].serialno;
+}
+
+int op_channel_count(const OggOpusFile *_of,int _li){
+ return op_head(_of,_li)->channel_count;
+}
+
+opus_int64 op_raw_total(const OggOpusFile *_of,int _li){
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED)
+ ||OP_UNLIKELY(!_of->seekable)
+ ||OP_UNLIKELY(_li>=_of->nlinks)){
+ return OP_EINVAL;
+ }
+ if(_li<0)return _of->end-_of->links[0].offset;
+ return (_li+1>=_of->nlinks?_of->end:_of->links[_li+1].offset)
+ -_of->links[_li].offset;
+}
+
+ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){
+ OggOpusLink *links;
+ ogg_int64_t diff;
+ int nlinks;
+ nlinks=_of->nlinks;
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED)
+ ||OP_UNLIKELY(!_of->seekable)
+ ||OP_UNLIKELY(_li>=nlinks)){
+ return OP_EINVAL;
+ }
+ links=_of->links;
+ /*We verify that the granule position differences are larger than the
+ pre-skip and that the total duration does not overflow during link
+ enumeration, so we don't have to check here.*/
+ if(_li<0){
+ ogg_int64_t pcm_total;
+ int li;
+ pcm_total=0;
+ for(li=0;li<nlinks;li++){
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,
+ links[li].pcm_end,links[li].pcm_start));
+ pcm_total+=diff-links[li].head.pre_skip;
+ }
+ return pcm_total;
+ }
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,
+ links[_li].pcm_end,links[_li].pcm_start));
+ return diff-links[_li].head.pre_skip;
+}
+
+const OpusHead *op_head(const OggOpusFile *_of,int _li){
+ if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+ if(!_of->seekable)_li=0;
+ return &_of->links[_li<0?_of->cur_link:_li].head;
+}
+
+const OpusTags *op_tags(const OggOpusFile *_of,int _li){
+ if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+ if(!_of->seekable){
+ if(_of->ready_state<OP_STREAMSET&&_of->ready_state!=OP_PARTOPEN){
+ return NULL;
+ }
+ _li=0;
+ }
+ else if(_li<0)_li=_of->ready_state>=OP_STREAMSET?_of->cur_link:0;
+ return &_of->links[_li].tags;
+}
+
+int op_current_link(const OggOpusFile *_of){
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+ return _of->cur_link;
+}
+
+/*Compute an average bitrate given a byte and sample count.
+ Return: The bitrate in bits per second.*/
+static opus_int32 op_calc_bitrate(opus_int64 _bytes,ogg_int64_t _samples){
+ /*These rates are absurd, but let's handle them anyway.*/
+ if(OP_UNLIKELY(_bytes>(OP_INT64_MAX-(_samples>>1))/(48000*8))){
+ ogg_int64_t den;
+ if(OP_UNLIKELY(_bytes/(OP_INT32_MAX/(48000*8))>=_samples)){
+ return OP_INT32_MAX;
+ }
+ den=_samples/(48000*8);
+ return (opus_int32)((_bytes+(den>>1))/den);
+ }
+ if(OP_UNLIKELY(_samples<=0))return OP_INT32_MAX;
+ /*This can't actually overflow in normal operation: even with a pre-skip of
+ 545 2.5 ms frames with 8 streams running at 1282*8+1 bytes per packet
+ (1275 byte frames + Opus framing overhead + Ogg lacing values), that all
+ produce a single sample of decoded output, we still don't top 45 Mbps.
+ The only way to get bitrates larger than that is with excessive Opus
+ padding, more encoded streams than output channels, or lots and lots of
+ Ogg pages with no packets on them.*/
+ return (opus_int32)OP_MIN((_bytes*48000*8+(_samples>>1))/_samples,
+ OP_INT32_MAX);
+}
+
+opus_int32 op_bitrate(const OggOpusFile *_of,int _li){
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED)||OP_UNLIKELY(!_of->seekable)
+ ||OP_UNLIKELY(_li>=_of->nlinks)){
+ return OP_EINVAL;
+ }
+ return op_calc_bitrate(op_raw_total(_of,_li),op_pcm_total(_of,_li));
+}
+
+opus_int32 op_bitrate_instant(OggOpusFile *_of){
+ ogg_int64_t samples_tracked;
+ opus_int32 ret;
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+ samples_tracked=_of->samples_tracked;
+ if(OP_UNLIKELY(samples_tracked==0))return OP_FALSE;
+ ret=op_calc_bitrate(_of->bytes_tracked,samples_tracked);
+ _of->bytes_tracked=0;
+ _of->samples_tracked=0;
+ return ret;
+}
+
+/*Fetch and process a page.
+ This handles the case where we're at a bitstream boundary and dumps the
+ decoding machine.
+ If the decoding machine is unloaded, it loads it.
+ It also keeps prev_packet_gp up to date (seek and read both use this; seek
+ uses a special hack with _readp).
+ Return: <0) Error, OP_HOLE (lost packet), or OP_EOF.
+ 0) Need more data (only if _readp==0).
+ 1) Got at least one audio data packet.*/
+static int op_fetch_and_process_page(OggOpusFile *_of,
+ ogg_page *_og,opus_int64 _page_pos,int _readp,int _spanp,int _ignore_holes){
+ OggOpusLink *links;
+ ogg_uint32_t cur_serialno;
+ int seekable;
+ int cur_link;
+ int ret;
+ /*We shouldn't get here if we have unprocessed packets.*/
+ OP_ASSERT(_of->ready_state<OP_INITSET||_of->op_pos>=_of->op_count);
+ if(!_readp)return 0;
+ seekable=_of->seekable;
+ links=_of->links;
+ cur_link=seekable?_of->cur_link:0;
+ cur_serialno=links[cur_link].serialno;
+ /*Handle one page.*/
+ for(;;){
+ ogg_page og;
+ OP_ASSERT(_of->ready_state>=OP_OPENED);
+ /*This loop is not strictly necessary, but there's no sense in doing the
+ extra checks of the larger loop for the common case in a multiplexed
+ bistream where the page is simply part of a different logical
+ bitstream.*/
+ do{
+ /*If we were given a page to use, use it.*/
+ if(_og!=NULL){
+ *&og=*_og;
+ _og=NULL;
+ }
+ /*Keep reading until we get a page with the correct serialno.*/
+ else _page_pos=op_get_next_page(_of,&og,_of->end);
+ /*EOF: Leave uninitialized.*/
+ if(_page_pos<0)return _page_pos<OP_FALSE?(int)_page_pos:OP_EOF;
+ if(OP_LIKELY(_of->ready_state>=OP_STREAMSET)){
+ if(cur_serialno!=(ogg_uint32_t)ogg_page_serialno(&og)){
+ /*Two possibilities:
+ 1) Another stream is multiplexed into this logical section, or*/
+ if(OP_LIKELY(!ogg_page_bos(&og)))continue;
+ /* 2) Our decoding just traversed a bitstream boundary.*/
+ if(!_spanp)return OP_EOF;
+ if(OP_LIKELY(_of->ready_state>=OP_INITSET))op_decode_clear(_of);
+ break;
+ }
+ }
+ /*Bitrate tracking: add the header's bytes here.
+ The body bytes are counted when we consume the packets.*/
+ _of->bytes_tracked+=og.header_len;
+ }
+ while(0);
+ /*Do we need to load a new machine before submitting the page?
+ This is different in the seekable and non-seekable cases.
+ In the seekable case, we already have all the header information loaded
+ and cached.
+ We just initialize the machine with it and continue on our merry way.
+ In the non-seekable (streaming) case, we'll only be at a boundary if we
+ just left the previous logical bitstream, and we're now nominally at the
+ header of the next bitstream.*/
+ if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET)){
+ if(seekable){
+ ogg_uint32_t serialno;
+ int nlinks;
+ int li;
+ serialno=ogg_page_serialno(&og);
+ /*Match the serialno to bitstream section.
+ We use this rather than offset positions to avoid problems near
+ logical bitstream boundaries.*/
+ nlinks=_of->nlinks;
+ for(li=0;li<nlinks&&links[li].serialno!=serialno;li++);
+ /*Not a desired Opus bitstream section.
+ Keep trying.*/
+ if(li>=nlinks)continue;
+ cur_serialno=serialno;
+ _of->cur_link=cur_link=li;
+ ogg_stream_reset_serialno(&_of->os,serialno);
+ _of->ready_state=OP_STREAMSET;
+ /*If we're at the start of this link, initialize the granule position
+ and pre-skip tracking.*/
+ if(_page_pos<=links[cur_link].data_offset){
+ _of->prev_packet_gp=links[cur_link].pcm_start;
+ _of->cur_discard_count=links[cur_link].head.pre_skip;
+ /*Ignore a hole at the start of a new link (this is common for
+ streams joined in the middle) or after seeking.*/
+ _ignore_holes=1;
+ }
+ }
+ else{
+ do{
+ /*We're streaming.
+ Fetch the two header packets, build the info struct.*/
+ ret=op_fetch_headers(_of,&links[0].head,&links[0].tags,
+ NULL,NULL,NULL,&og);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*op_find_initial_pcm_offset() will suppress any initial hole for us,
+ so no need to set _ignore_holes.*/
+ ret=op_find_initial_pcm_offset(_of,links,&og);
+ if(OP_UNLIKELY(ret<0))return ret;
+ _of->links[0].serialno=cur_serialno=_of->os.serialno;
+ _of->cur_link++;
+ }
+ /*If the link was empty, keep going, because we already have the
+ BOS page of the next one in og.*/
+ while(OP_UNLIKELY(ret>0));
+ /*If we didn't get any packets out of op_find_initial_pcm_offset(),
+ keep going (this is possible if end-trimming trimmed them all).*/
+ if(_of->op_count<=0)continue;
+ /*Otherwise, we're done.*/
+ ret=op_make_decode_ready(_of);
+ if(OP_UNLIKELY(ret<0))return ret;
+ return 1;
+ }
+ }
+ /*The buffered page is the data we want, and we're ready for it.
+ Add it to the stream state.*/
+ if(OP_UNLIKELY(_of->ready_state==OP_STREAMSET)){
+ ret=op_make_decode_ready(_of);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ /*Extract all the packets from the current page.*/
+ ogg_stream_pagein(&_of->os,&og);
+ if(OP_LIKELY(_of->ready_state>=OP_INITSET)){
+ opus_int32 total_duration;
+ int durations[255];
+ int op_count;
+ total_duration=op_collect_audio_packets(_of,durations);
+ if(OP_UNLIKELY(total_duration<0)){
+ /*Drain the packets from the page anyway.*/
+ total_duration=op_collect_audio_packets(_of,durations);
+ OP_ASSERT(total_duration>=0);
+ /*Report holes to the caller.*/
+ if(!_ignore_holes)return OP_HOLE;
+ }
+ op_count=_of->op_count;
+ /*If we found at least one audio data packet, compute per-packet granule
+ positions for them.*/
+ if(op_count>0){
+ ogg_int64_t diff;
+ ogg_int64_t prev_packet_gp;
+ ogg_int64_t cur_packet_gp;
+ ogg_int64_t cur_page_gp;
+ int cur_page_eos;
+ int pi;
+ cur_page_gp=_of->op[op_count-1].granulepos;
+ cur_page_eos=_of->op[op_count-1].e_o_s;
+ prev_packet_gp=_of->prev_packet_gp;
+ if(OP_UNLIKELY(prev_packet_gp==-1)){
+ opus_int32 cur_discard_count;
+ /*This is the first call after a raw seek.
+ Try to reconstruct prev_packet_gp from scratch.*/
+ OP_ASSERT(seekable);
+ if(OP_UNLIKELY(cur_page_eos)){
+ /*If the first page we hit after our seek was the EOS page, and
+ we didn't start from data_offset or before, we don't have
+ enough information to do end-trimming.
+ Proceed to the next link, rather than risk playing back some
+ samples that shouldn't have been played.*/
+ _of->op_count=0;
+ continue;
+ }
+ /*By default discard 80 ms of data after a seek, unless we seek
+ into the pre-skip region.*/
+ cur_discard_count=80*48;
+ cur_page_gp=_of->op[op_count-1].granulepos;
+ /*Try to initialize prev_packet_gp.
+ If the current page had packets but didn't have a granule
+ position, or the granule position it had was too small (both
+ illegal), just use the starting granule position for the link.*/
+ prev_packet_gp=links[cur_link].pcm_start;
+ if(OP_LIKELY(cur_page_gp!=-1)){
+ op_granpos_add(&prev_packet_gp,cur_page_gp,-total_duration);
+ }
+ if(OP_LIKELY(!op_granpos_diff(&diff,
+ prev_packet_gp,links[cur_link].pcm_start))){
+ opus_int32 pre_skip;
+ /*If we start at the beginning of the pre-skip region, or we're
+ at least 80 ms from the end of the pre-skip region, we discard
+ to the end of the pre-skip region.
+ Otherwise, we still use the 80 ms default, which will discard
+ past the end of the pre-skip region.*/
+ pre_skip=links[cur_link].head.pre_skip;
+ if(diff>=0&&diff<=OP_MAX(0,pre_skip-80*48)){
+ cur_discard_count=pre_skip-(int)diff;
+ }
+ }
+ _of->cur_discard_count=cur_discard_count;
+ }
+ if(OP_UNLIKELY(cur_page_gp==-1)){
+ /*This page had completed packets but didn't have a valid granule
+ position.
+ This is illegal, but we'll try to handle it by continuing to count
+ forwards from the previous page.*/
+ if(op_granpos_add(&cur_page_gp,prev_packet_gp,total_duration)<0){
+ /*The timestamp for this page overflowed.*/
+ cur_page_gp=links[cur_link].pcm_end;
+ }
+ }
+ /*If we hit the last page, handle end-trimming.*/
+ if(OP_UNLIKELY(cur_page_eos)
+ &&OP_LIKELY(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp))
+ &&OP_LIKELY(diff<total_duration)){
+ cur_packet_gp=prev_packet_gp;
+ for(pi=0;pi<op_count;pi++){
+ diff=durations[pi]-diff;
+ /*If we have samples to trim...*/
+ if(diff>0){
+ /*If we trimmed the entire packet, stop (the spec says encoders
+ shouldn't do this, but we support it anyway).*/
+ if(OP_UNLIKELY(diff>durations[pi]))break;
+ cur_packet_gp=cur_page_gp;
+ /*Move the EOS flag to this packet, if necessary, so we'll trim
+ the samples during decode.*/
+ _of->op[pi].e_o_s=1;
+ }
+ else{
+ /*Update the granule position as normal.*/
+ OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp,
+ cur_packet_gp,durations[pi]));
+ }
+ _of->op[pi].granulepos=cur_packet_gp;
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,cur_packet_gp));
+ }
+ }
+ else{
+ /*Propagate timestamps to earlier packets.
+ op_granpos_add(&prev_packet_gp,prev_packet_gp,total_duration)
+ should succeed and give prev_packet_gp==cur_page_gp.
+ But we don't bother to check that, as there isn't much we can do
+ if it's not true.
+ The only thing we guarantee is that the start and end granule
+ positions of the packets are valid, and that they are monotonic
+ within a page.
+ They might be completely out of range for this link (we'll check
+ that elsewhere), or non-monotonic between pages.*/
+ if(OP_UNLIKELY(op_granpos_add(&prev_packet_gp,
+ cur_page_gp,-total_duration)<0)){
+ /*The starting timestamp for the first packet on this page
+ underflowed.
+ This is illegal, but we ignore it.*/
+ prev_packet_gp=0;
+ }
+ for(pi=0;pi<op_count;pi++){
+ if(OP_UNLIKELY(op_granpos_add(&cur_packet_gp,
+ cur_page_gp,-total_duration)<0)){
+ /*The start timestamp for this packet underflowed.
+ This is illegal, but we ignore it.*/
+ cur_packet_gp=0;
+ }
+ total_duration-=durations[pi];
+ OP_ASSERT(total_duration>=0);
+ OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp,
+ cur_packet_gp,durations[pi]));
+ _of->op[pi].granulepos=cur_packet_gp;
+ }
+ OP_ASSERT(total_duration==0);
+ }
+ _of->prev_packet_gp=prev_packet_gp;
+ _of->op_count=pi;
+ /*If end-trimming didn't trim all the packets, we're done.*/
+ if(OP_LIKELY(pi>0))return 1;
+ }
+ }
+ }
+}
+
+int op_raw_seek(OggOpusFile *_of,opus_int64 _pos){
+ int ret;
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+ /*Don't dump the decoder state if we can't seek.*/
+ if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK;
+ if(OP_UNLIKELY(_pos<0)||OP_UNLIKELY(_pos>_of->end))return OP_EINVAL;
+ /*Clear out any buffered, decoded data.*/
+ op_decode_clear(_of);
+ _of->bytes_tracked=0;
+ _of->samples_tracked=0;
+ ret=op_seek_helper(_of,_pos);
+ if(OP_UNLIKELY(ret<0))return OP_EREAD;
+ ret=op_fetch_and_process_page(_of,NULL,-1,1,1,1);
+ /*If we hit EOF, op_fetch_and_process_page() leaves us uninitialized.
+ Instead, jump to the end.*/
+ if(ret==OP_EOF){
+ int cur_link;
+ op_decode_clear(_of);
+ cur_link=_of->nlinks-1;
+ _of->cur_link=cur_link;
+ _of->prev_packet_gp=_of->links[cur_link].pcm_end;
+ _of->cur_discard_count=0;
+ ret=0;
+ }
+ else if(ret>0)ret=0;
+ return ret;
+}
+
+/*Convert a PCM offset relative to the start of the whole stream to a granule
+ position in an individual link.*/
+static ogg_int64_t op_get_granulepos(const OggOpusFile *_of,
+ ogg_int64_t _pcm_offset,int *_li){
+ const OggOpusLink *links;
+ ogg_int64_t duration;
+ int nlinks;
+ int li;
+ OP_ASSERT(_pcm_offset>=0);
+ nlinks=_of->nlinks;
+ links=_of->links;
+ for(li=0;OP_LIKELY(li<nlinks);li++){
+ ogg_int64_t pcm_start;
+ opus_int32 pre_skip;
+ pcm_start=links[li].pcm_start;
+ pre_skip=links[li].head.pre_skip;
+ OP_ALWAYS_TRUE(!op_granpos_diff(&duration,links[li].pcm_end,pcm_start));
+ duration-=pre_skip;
+ if(_pcm_offset<duration){
+ _pcm_offset+=pre_skip;
+ if(OP_UNLIKELY(pcm_start>OP_INT64_MAX-_pcm_offset)){
+ /*Adding this amount to the granule position would overflow the positive
+ half of its 64-bit range.
+ Since signed overflow is undefined in C, do it in a way the compiler
+ isn't allowed to screw up.*/
+ _pcm_offset-=OP_INT64_MAX-pcm_start+1;
+ pcm_start=OP_INT64_MIN;
+ }
+ pcm_start+=_pcm_offset;
+ *_li=li;
+ return pcm_start;
+ }
+ _pcm_offset-=duration;
+ }
+ return -1;
+}
+
+/*This controls how close the target has to be to use the current stream
+ position to subdivide the initial range.
+ Two minutes seems to be a good default.*/
+#define OP_CUR_TIME_THRESH (120*48*(opus_int32)1000)
+
+/*Note: The OP_SMALL_FOOTPRINT #define doesn't (currently) save much code size,
+ but it's meant to serve as documentation for portions of the seeking
+ algorithm that are purely optional, to aid others learning from/porting this
+ code to other contexts.*/
+/*#define OP_SMALL_FOOTPRINT (1)*/
+
+/*Search within link _li for the page with the highest granule position
+ preceding (or equal to) _target_gp.
+ There is a danger here: missing pages or incorrect frame number information
+ in the bitstream could make our task impossible.
+ Account for that (and report it as an error condition).*/
+static int op_pcm_seek_page(OggOpusFile *_of,
+ ogg_int64_t _target_gp,int _li){
+ const OggOpusLink *link;
+ ogg_page og;
+ ogg_int64_t pcm_pre_skip;
+ ogg_int64_t pcm_start;
+ ogg_int64_t pcm_end;
+ ogg_int64_t best_gp;
+ ogg_int64_t diff;
+ ogg_uint32_t serialno;
+ opus_int32 pre_skip;
+ opus_int64 begin;
+ opus_int64 end;
+ opus_int64 boundary;
+ opus_int64 best;
+ opus_int64 page_offset;
+ opus_int64 d0;
+ opus_int64 d1;
+ opus_int64 d2;
+ int force_bisect;
+ int ret;
+ _of->bytes_tracked=0;
+ _of->samples_tracked=0;
+ link=_of->links+_li;
+ best_gp=pcm_start=link->pcm_start;
+ pcm_end=link->pcm_end;
+ serialno=link->serialno;
+ best=begin=link->data_offset;
+ page_offset=-1;
+ /*We discard the first 80 ms of data after a seek, so seek back that much
+ farther.
+ If we can't, simply seek to the beginning of the link.*/
+ if(OP_UNLIKELY(op_granpos_add(&_target_gp,_target_gp,-80*48)<0)
+ ||OP_UNLIKELY(op_granpos_cmp(_target_gp,pcm_start)<0)){
+ _target_gp=pcm_start;
+ }
+ /*Special case seeking to the start of the link.*/
+ pre_skip=link->head.pre_skip;
+ OP_ALWAYS_TRUE(!op_granpos_add(&pcm_pre_skip,pcm_start,pre_skip));
+ if(op_granpos_cmp(_target_gp,pcm_pre_skip)<0)end=boundary=begin;
+ else{
+ end=boundary=link->end_offset;
+#if !defined(OP_SMALL_FOOTPRINT)
+ /*If we were decoding from this link, we can narrow the range a bit.*/
+ if(_li==_of->cur_link&&_of->ready_state>=OP_INITSET){
+ opus_int64 offset;
+ int op_count;
+ op_count=_of->op_count;
+ /*The only way the offset can be invalid _and_ we can fail the granule
+ position checks below is if someone changed the contents of the last
+ page since we read it.
+ We'd be within our rights to just return OP_EBADLINK in that case, but
+ we'll simply ignore the current position instead.*/
+ offset=_of->offset;
+ if(op_count>0&&OP_LIKELY(offset<=end)){
+ ogg_int64_t gp;
+ /*Make sure the timestamp is valid.
+ The granule position might be -1 if we collected the packets from a
+ page without a granule position after reporting a hole.*/
+ gp=_of->op[op_count-1].granulepos;
+ if(OP_LIKELY(gp!=-1)&&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<0)
+ &&OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)){
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,gp,_target_gp));
+ /*We only actually use the current time if either
+ a) We can cut off at least half the range, or
+ b) We're seeking sufficiently close to the current position that
+ it's likely to be informative.
+ Otherwise it appears using the whole link range to estimate the
+ first seek location gives better results, on average.*/
+ if(diff<0){
+ OP_ASSERT(offset>=begin);
+ if(offset-begin>=end-begin>>1||diff>-OP_CUR_TIME_THRESH){
+ best=begin=offset;
+ best_gp=pcm_start=gp;
+ }
+ }
+ else{
+ ogg_int64_t prev_page_gp;
+ /*We might get lucky and already have the packet with the target
+ buffered.
+ Worth checking.
+ For very small files (with all of the data in a single page,
+ generally 1 second or less), we can loop them continuously
+ without seeking at all.*/
+ OP_ALWAYS_TRUE(!op_granpos_add(&prev_page_gp,_of->op[0].granulepos,
+ op_get_packet_duration(_of->op[0].packet,_of->op[0].bytes)));
+ if(op_granpos_cmp(prev_page_gp,_target_gp)<=0){
+ /*Don't call op_decode_clear(), because it will dump our
+ packets.*/
+ _of->op_pos=0;
+ _of->od_buffer_size=0;
+ _of->prev_packet_gp=prev_page_gp;
+ _of->ready_state=OP_STREAMSET;
+ return op_make_decode_ready(_of);
+ }
+ /*No such luck.
+ Check if we can cut off at least half the range, though.*/
+ if(offset-begin<=end-begin>>1||diff<OP_CUR_TIME_THRESH){
+ /*We really want the page start here, but this will do.*/
+ end=boundary=offset;
+ pcm_end=gp;
+ }
+ }
+ }
+ }
+ }
+#endif
+ }
+ /*This code was originally based on the "new search algorithm by HB (Nicholas
+ Vinen)" from libvorbisfile.
+ It has been modified substantially since.*/
+ op_decode_clear(_of);
+ /*Initialize the interval size history.*/
+ d2=d1=d0=end-begin;
+ force_bisect=0;
+ while(begin<end){
+ opus_int64 bisect;
+ opus_int64 next_boundary;
+ opus_int32 chunk_size;
+ if(end-begin<OP_CHUNK_SIZE)bisect=begin;
+ else{
+ /*Update the interval size history.*/
+ d0=d1>>1;
+ d1=d2>>1;
+ d2=end-begin>>1;
+ if(force_bisect)bisect=begin+(end-begin>>1);
+ else{
+ ogg_int64_t diff2;
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start));
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff2,pcm_end,pcm_start));
+ /*Take a (pretty decent) guess.*/
+ bisect=begin+op_rescale64(diff,diff2,end-begin)-OP_CHUNK_SIZE;
+ }
+ if(bisect-OP_CHUNK_SIZE<begin)bisect=begin;
+ force_bisect=0;
+ }
+ if(bisect!=_of->offset){
+ page_offset=-1;
+ ret=op_seek_helper(_of,bisect);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ chunk_size=OP_CHUNK_SIZE;
+ next_boundary=boundary;
+ while(begin<end){
+ page_offset=op_get_next_page(_of,&og,boundary);
+ if(page_offset<0){
+ if(page_offset<OP_FALSE)return (int)page_offset;
+ /*There are no more pages in our interval from our stream with a valid
+ timestamp that start at position bisect or later.*/
+ /*If we scanned the whole interval, we're done.*/
+ if(bisect<=begin+1)end=begin;
+ else{
+ /*Otherwise, back up one chunk.*/
+ bisect=OP_MAX(bisect-chunk_size,begin);
+ ret=op_seek_helper(_of,bisect);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*Bump up the chunk size.*/
+ chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+ /*If we did find a page from another stream or without a timestamp,
+ don't read past it.*/
+ boundary=next_boundary;
+ }
+ }
+ else{
+ ogg_int64_t gp;
+ /*Save the offset of the first page we found after the seek, regardless
+ of the stream it came from or whether or not it has a timestamp.*/
+ next_boundary=OP_MIN(page_offset,next_boundary);
+ if(serialno!=(ogg_uint32_t)ogg_page_serialno(&og))continue;
+ gp=ogg_page_granulepos(&og);
+ if(gp==-1)continue;
+ if(op_granpos_cmp(gp,_target_gp)<0){
+ /*We found a page that ends before our target.
+ Advance to the raw offset of the next page.*/
+ begin=_of->offset;
+ if(OP_UNLIKELY(op_granpos_cmp(pcm_start,gp)>0)
+ ||OP_UNLIKELY(op_granpos_cmp(pcm_end,gp)<0)){
+ /*Don't let pcm_start get out of range!
+ That could happen with an invalid timestamp.*/
+ break;
+ }
+ /*Save the byte offset of the end of the page with this granule
+ position.*/
+ best=begin;
+ best_gp=pcm_start=gp;
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start));
+ /*If we're more than a second away from our target, break out and
+ do another bisection.*/
+ if(diff>48000)break;
+ /*Otherwise, keep scanning forward (do NOT use begin+1).*/
+ bisect=begin;
+ }
+ else{
+ /*We found a page that ends after our target.*/
+ /*If we scanned the whole interval before we found it, we're done.*/
+ if(bisect<=begin+1)end=begin;
+ else{
+ end=bisect;
+ /*In later iterations, don't read past the first page we found.*/
+ boundary=next_boundary;
+ /*If we're not making much progress shrinking the interval size,
+ start forcing straight bisection to limit the worst case.*/
+ force_bisect=end-begin>d0*2;
+ /*Don't let pcm_end get out of range!
+ That could happen with an invalid timestamp.*/
+ if(OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)
+ &&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<=0)){
+ pcm_end=gp;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+ /*Found our page.
+ Seek to the end of it and update prev_packet_gp.
+ Our caller will set cur_discard_count.
+ This is an easier case than op_raw_seek(), as we don't need to keep any
+ packets from the page we found.*/
+ /*Seek, if necessary.*/
+ if(best!=page_offset){
+ page_offset=-1;
+ ret=op_seek_helper(_of,best);
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+ OP_ASSERT(op_granpos_cmp(best_gp,pcm_start)>=0);
+ _of->cur_link=_li;
+ _of->ready_state=OP_STREAMSET;
+ _of->prev_packet_gp=best_gp;
+ ogg_stream_reset_serialno(&_of->os,serialno);
+ ret=op_fetch_and_process_page(_of,page_offset<0?NULL:&og,page_offset,1,0,1);
+ if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
+ /*Verify result.*/
+ if(OP_UNLIKELY(op_granpos_cmp(_of->prev_packet_gp,_target_gp)>0)){
+ return OP_EBADLINK;
+ }
+ return 0;
+}
+
+int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset){
+ const OggOpusLink *link;
+ ogg_int64_t pcm_start;
+ ogg_int64_t target_gp;
+ ogg_int64_t prev_packet_gp;
+ ogg_int64_t skip;
+ ogg_int64_t diff;
+ int op_count;
+ int op_pos;
+ int ret;
+ int li;
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+ if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK;
+ if(OP_UNLIKELY(_pcm_offset<0))return OP_EINVAL;
+ target_gp=op_get_granulepos(_of,_pcm_offset,&li);
+ if(OP_UNLIKELY(target_gp==-1))return OP_EINVAL;
+ link=_of->links+li;
+ pcm_start=link->pcm_start;
+ OP_ALWAYS_TRUE(!op_granpos_diff(&_pcm_offset,target_gp,pcm_start));
+#if !defined(OP_SMALL_FOOTPRINT)
+ /*For small (90 ms or less) forward seeks within the same link, just decode
+ forward.
+ This also optimizes the case of seeking to the current position.*/
+ if(li==_of->cur_link&&_of->ready_state>=OP_INITSET){
+ ogg_int64_t gp;
+ gp=_of->prev_packet_gp;
+ if(OP_LIKELY(gp!=-1)){
+ int nbuffered;
+ nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0);
+ OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered));
+ /*We do _not_ add cur_discard_count to gp.
+ Otherwise the total amount to discard could grow without bound, and it
+ would be better just to do a full seek.*/
+ if(OP_LIKELY(!op_granpos_diff(&diff,gp,pcm_start))){
+ ogg_int64_t discard_count;
+ discard_count=_pcm_offset-diff;
+ /*We use a threshold of 90 ms instead of 80, since 80 ms is the
+ _minimum_ we would have discarded after a full seek.
+ Assuming 20 ms frames (the default), we'd discard 90 ms on average.*/
+ if(discard_count>=0&&OP_UNLIKELY(discard_count<90*48)){
+ _of->cur_discard_count=(opus_int32)discard_count;
+ return 0;
+ }
+ }
+ }
+ }
+#endif
+ ret=op_pcm_seek_page(_of,target_gp,li);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*Now skip samples until we actually get to our target.*/
+ /*Figure out where we should skip to.*/
+ if(_pcm_offset<=link->head.pre_skip)skip=0;
+ else skip=OP_MAX(_pcm_offset-80*48,0);
+ OP_ASSERT(_pcm_offset-skip>=0);
+ OP_ASSERT(_pcm_offset-skip<OP_INT32_MAX-120*48);
+ /*Skip packets until we find one with samples past our skip target.*/
+ for(;;){
+ op_count=_of->op_count;
+ prev_packet_gp=_of->prev_packet_gp;
+ for(op_pos=_of->op_pos;op_pos<op_count;op_pos++){
+ ogg_int64_t cur_packet_gp;
+ cur_packet_gp=_of->op[op_pos].granulepos;
+ if(OP_LIKELY(!op_granpos_diff(&diff,cur_packet_gp,pcm_start))
+ &&diff>skip){
+ break;
+ }
+ prev_packet_gp=cur_packet_gp;
+ }
+ _of->prev_packet_gp=prev_packet_gp;
+ _of->op_pos=op_pos;
+ if(op_pos<op_count)break;
+ /*We skipped all the packets on this page.
+ Fetch another.*/
+ ret=op_fetch_and_process_page(_of,NULL,-1,1,0,1);
+ if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
+ }
+ OP_ALWAYS_TRUE(!op_granpos_diff(&diff,prev_packet_gp,pcm_start));
+ /*We skipped too far.
+ Either the timestamps were illegal or there was a hole in the data.*/
+ if(diff>skip)return OP_EBADLINK;
+ OP_ASSERT(_pcm_offset-diff<OP_INT32_MAX);
+ /*TODO: If there are further holes/illegal timestamps, we still won't decode
+ to the correct sample.
+ However, at least op_pcm_tell() will report the correct value immediately
+ after returning.*/
+ _of->cur_discard_count=(opus_int32)(_pcm_offset-diff);
+ return 0;
+}
+
+opus_int64 op_raw_tell(const OggOpusFile *_of){
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+ return _of->offset;
+}
+
+/*Convert a granule position from a given link to a PCM offset relative to the
+ start of the whole stream.
+ For unseekable sources, this gets reset to 0 at the beginning of each link.*/
+static ogg_int64_t op_get_pcm_offset(const OggOpusFile *_of,
+ ogg_int64_t _gp,int _li){
+ const OggOpusLink *links;
+ ogg_int64_t pcm_offset;
+ ogg_int64_t delta;
+ int li;
+ links=_of->links;
+ pcm_offset=0;
+ OP_ASSERT(_li<_of->nlinks);
+ for(li=0;li<_li;li++){
+ OP_ALWAYS_TRUE(!op_granpos_diff(&delta,
+ links[li].pcm_end,links[li].pcm_start));
+ delta-=links[li].head.pre_skip;
+ pcm_offset+=delta;
+ }
+ OP_ASSERT(_li>=0);
+ if(_of->seekable&&OP_UNLIKELY(op_granpos_cmp(_gp,links[_li].pcm_end)>0)){
+ _gp=links[_li].pcm_end;
+ }
+ if(OP_LIKELY(op_granpos_cmp(_gp,links[_li].pcm_start)>0)){
+ if(OP_UNLIKELY(op_granpos_diff(&delta,_gp,links[_li].pcm_start)<0)){
+ /*This means an unseekable stream claimed to have a page from more than
+ 2 billion days after we joined.*/
+ OP_ASSERT(!_of->seekable);
+ return OP_INT64_MAX;
+ }
+ if(delta<links[_li].head.pre_skip)delta=0;
+ else delta-=links[_li].head.pre_skip;
+ /*In the seekable case, _gp was limited by pcm_end.
+ In the unseekable case, pcm_offset should be 0.*/
+ OP_ASSERT(pcm_offset<=OP_INT64_MAX-delta);
+ pcm_offset+=delta;
+ }
+ return pcm_offset;
+}
+
+ogg_int64_t op_pcm_tell(const OggOpusFile *_of){
+ ogg_int64_t gp;
+ int nbuffered;
+ int li;
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+ gp=_of->prev_packet_gp;
+ if(gp==-1)return 0;
+ nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0);
+ OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered));
+ li=_of->seekable?_of->cur_link:0;
+ if(op_granpos_add(&gp,gp,_of->cur_discard_count)<0){
+ gp=_of->links[li].pcm_end;
+ }
+ return op_get_pcm_offset(_of,gp,li);
+}
+
+void op_set_decode_callback(OggOpusFile *_of,
+ op_decode_cb_func _decode_cb,void *_ctx){
+ _of->decode_cb=_decode_cb;
+ _of->decode_cb_ctx=_ctx;
+}
+
+int op_set_gain_offset(OggOpusFile *_of,
+ int _gain_type,opus_int32 _gain_offset_q8){
+ if(_gain_type!=OP_HEADER_GAIN&&_gain_type!=OP_TRACK_GAIN
+ &&_gain_type!=OP_ABSOLUTE_GAIN){
+ return OP_EINVAL;
+ }
+ _of->gain_type=_gain_type;
+ /*The sum of header gain and track gain lies in the range [-65536,65534].
+ These bounds allow the offset to set the final value to anywhere in the
+ range [-32768,32767], which is what we'll clamp it to before applying.*/
+ _of->gain_offset_q8=OP_CLAMP(-98302,_gain_offset_q8,98303);
+ op_update_gain(_of);
+ return 0;
+}
+
+void op_set_dither_enabled(OggOpusFile *_of,int _enabled){
+#if !defined(OPUS_FIXED_POINT)
+ _of->dither_disabled=!_enabled;
+ if(!_enabled)_of->dither_mute=65;
+#endif
+}
+
+/*Allocate the decoder scratch buffer.
+ This is done lazily, since if the user provides large enough buffers, we'll
+ never need it.*/
+static int op_init_buffer(OggOpusFile *_of){
+ int nchannels_max;
+ if(_of->seekable){
+ const OggOpusLink *links;
+ int nlinks;
+ int li;
+ links=_of->links;
+ nlinks=_of->nlinks;
+ nchannels_max=1;
+ for(li=0;li<nlinks;li++){
+ nchannels_max=OP_MAX(nchannels_max,links[li].head.channel_count);
+ }
+ }
+ else nchannels_max=OP_NCHANNELS_MAX;
+ _of->od_buffer=(op_sample *)_ogg_malloc(
+ sizeof(*_of->od_buffer)*nchannels_max*120*48);
+ if(_of->od_buffer==NULL)return OP_EFAULT;
+ return 0;
+}
+
+/*Decode a single packet into the target buffer.*/
+static int op_decode(OggOpusFile *_of,op_sample *_pcm,
+ const ogg_packet *_op,int _nsamples,int _nchannels){
+ int ret;
+ /*First we try using the application-provided decode callback.*/
+ if(_of->decode_cb!=NULL){
+#if defined(OPUS_FIXED_POINT)
+ ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op,
+ _nsamples,_nchannels,OP_DEC_FORMAT_SHORT,_of->cur_link);
+#else
+ ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op,
+ _nsamples,_nchannels,OP_DEC_FORMAT_FLOAT,_of->cur_link);
+#endif
+ }
+ else ret=OP_DEC_USE_DEFAULT;
+ /*If the application didn't want to handle decoding, do it ourselves.*/
+ if(ret==OP_DEC_USE_DEFAULT){
+#if defined(OPUS_FIXED_POINT)
+ ret=opus_multistream_decode(_of->od,
+ _op->packet,_op->bytes,_pcm,_nsamples,0);
+#else
+ ret=opus_multistream_decode_float(_of->od,
+ _op->packet,_op->bytes,_pcm,_nsamples,0);
+#endif
+ OP_ASSERT(ret<0||ret==_nsamples);
+ }
+ /*If the application returned a positive value other than 0 or
+ OP_DEC_USE_DEFAULT, fail.*/
+ else if(OP_UNLIKELY(ret>0))return OP_EBADPACKET;
+ if(OP_UNLIKELY(ret<0))return OP_EBADPACKET;
+ return ret;
+}
+
+/*Read more samples from the stream, using the same API as op_read() or
+ op_read_float().*/
+static int op_read_native(OggOpusFile *_of,
+ op_sample *_pcm,int _buf_size,int *_li){
+ if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+ for(;;){
+ int ret;
+ if(OP_LIKELY(_of->ready_state>=OP_INITSET)){
+ int nchannels;
+ int od_buffer_pos;
+ int nsamples;
+ int op_pos;
+ nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count;
+ od_buffer_pos=_of->od_buffer_pos;
+ nsamples=_of->od_buffer_size-od_buffer_pos;
+ /*If we have buffered samples, return them.*/
+ if(nsamples>0){
+ if(nsamples*nchannels>_buf_size)nsamples=_buf_size/nchannels;
+ memcpy(_pcm,_of->od_buffer+nchannels*od_buffer_pos,
+ sizeof(*_pcm)*nchannels*nsamples);
+ od_buffer_pos+=nsamples;
+ _of->od_buffer_pos=od_buffer_pos;
+ if(_li!=NULL)*_li=_of->cur_link;
+ return nsamples;
+ }
+ /*If we have buffered packets, decode one.*/
+ op_pos=_of->op_pos;
+ if(OP_LIKELY(op_pos<_of->op_count)){
+ const ogg_packet *pop;
+ ogg_int64_t diff;
+ opus_int32 cur_discard_count;
+ int duration;
+ int trimmed_duration;
+ pop=_of->op+op_pos++;
+ _of->op_pos=op_pos;
+ cur_discard_count=_of->cur_discard_count;
+ duration=op_get_packet_duration(pop->packet,pop->bytes);
+ /*We don't buffer packets with an invalid TOC sequence.*/
+ OP_ASSERT(duration>0);
+ trimmed_duration=duration;
+ /*Perform end-trimming.*/
+ if(OP_UNLIKELY(pop->e_o_s)){
+ if(OP_UNLIKELY(op_granpos_cmp(pop->granulepos,
+ _of->prev_packet_gp)<=0)){
+ trimmed_duration=0;
+ }
+ else if(OP_LIKELY(!op_granpos_diff(&diff,
+ pop->granulepos,_of->prev_packet_gp))){
+ trimmed_duration=(int)OP_MIN(diff,trimmed_duration);
+ }
+ }
+ _of->prev_packet_gp=pop->granulepos;
+ if(OP_UNLIKELY(duration*nchannels>_buf_size)){
+ op_sample *buf;
+ /*If the user's buffer is too small, decode into a scratch buffer.*/
+ buf=_of->od_buffer;
+ if(OP_UNLIKELY(buf==NULL)){
+ ret=op_init_buffer(_of);
+ if(OP_UNLIKELY(ret<0))return ret;
+ buf=_of->od_buffer;
+ }
+ ret=op_decode(_of,buf,pop,duration,nchannels);
+ if(OP_UNLIKELY(ret<0))return ret;
+ /*Perform pre-skip/pre-roll.*/
+ od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count);
+ cur_discard_count-=od_buffer_pos;
+ _of->cur_discard_count=cur_discard_count;
+ _of->od_buffer_pos=od_buffer_pos;
+ _of->od_buffer_size=trimmed_duration;
+ /*Update bitrate tracking based on the actual samples we used from
+ what was decoded.*/
+ _of->bytes_tracked+=pop->bytes;
+ _of->samples_tracked+=trimmed_duration-od_buffer_pos;
+ }
+ else{
+ /*Otherwise decode directly into the user's buffer.*/
+ ret=op_decode(_of,_pcm,pop,duration,nchannels);
+ if(OP_UNLIKELY(ret<0))return ret;
+ if(OP_LIKELY(trimmed_duration>0)){
+ /*Perform pre-skip/pre-roll.*/
+ od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count);
+ cur_discard_count-=od_buffer_pos;
+ _of->cur_discard_count=cur_discard_count;
+ trimmed_duration-=od_buffer_pos;
+ if(OP_LIKELY(trimmed_duration>0)
+ &&OP_UNLIKELY(od_buffer_pos>0)){
+ memmove(_pcm,_pcm+od_buffer_pos*nchannels,
+ sizeof(*_pcm)*trimmed_duration*nchannels);
+ }
+ /*Update bitrate tracking based on the actual samples we used from
+ what was decoded.*/
+ _of->bytes_tracked+=pop->bytes;
+ _of->samples_tracked+=trimmed_duration;
+ if(OP_LIKELY(trimmed_duration>0)){
+ if(_li!=NULL)*_li=_of->cur_link;
+ return trimmed_duration;
+ }
+ }
+ }
+ /*Don't grab another page yet.
+ This one might have more packets, or might have buffered data now.*/
+ continue;
+ }
+ }
+ /*Suck in another page.*/
+ ret=op_fetch_and_process_page(_of,NULL,-1,1,1,0);
+ if(OP_UNLIKELY(ret==OP_EOF)){
+ if(_li!=NULL)*_li=_of->cur_link;
+ return 0;
+ }
+ if(OP_UNLIKELY(ret<0))return ret;
+ }
+}
+
+/*A generic filter to apply to the decoded audio data.
+ _src is non-const because we will destructively modify the contents of the
+ source buffer that we consume in some cases.*/
+typedef int (*op_read_filter_func)(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels);
+
+/*Decode some samples and then apply a custom filter to them.
+ This is used to convert to different output formats.*/
+static int op_filter_read_native(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_read_filter_func _filter,int *_li){
+ int ret;
+ /*Ensure we have some decoded samples in our buffer.*/
+ ret=op_read_native(_of,NULL,0,_li);
+ /*Now apply the filter to them.*/
+ if(OP_LIKELY(ret>=0)&&OP_LIKELY(_of->ready_state>=OP_INITSET)){
+ int od_buffer_pos;
+ od_buffer_pos=_of->od_buffer_pos;
+ ret=_of->od_buffer_size-od_buffer_pos;
+ if(OP_LIKELY(ret>0)){
+ int nchannels;
+ nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count;
+ ret=(*_filter)(_of,_dst,_dst_sz,
+ _of->od_buffer+nchannels*od_buffer_pos,ret,nchannels);
+ OP_ASSERT(ret>=0);
+ OP_ASSERT(ret<=_of->od_buffer_size-od_buffer_pos);
+ od_buffer_pos+=ret;
+ _of->od_buffer_pos=od_buffer_pos;
+ }
+ }
+ return ret;
+}
+
+#if !defined(OPUS_FIXED_POINT)||!defined(OP_DISABLE_FLOAT_API)
+
+/*Matrices for downmixing from the supported channel counts to stereo.
+ The matrices with 5 or more channels are normalized to a total volume of 2.0,
+ since most mixes sound too quiet if normalized to 1.0 (as there is generally
+ little volume in the side/rear channels).*/
+static const float OP_STEREO_DOWNMIX[OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={
+ /*3.0*/
+ {
+ {0.5858F,0.0F},{0.4142F,0.4142F},{0.0F,0.5858F}
+ },
+ /*quadrophonic*/
+ {
+ {0.4226F,0.0F},{0.0F,0.4226F},{0.366F,0.2114F},{0.2114F,0.336F}
+ },
+ /*5.0*/
+ {
+ {0.651F,0.0F},{0.46F,0.46F},{0.0F,0.651F},{0.5636F,0.3254F},
+ {0.3254F,0.5636F}
+ },
+ /*5.1*/
+ {
+ {0.529F,0.0F},{0.3741F,0.3741F},{0.0F,0.529F},{0.4582F,0.2645F},
+ {0.2645F,0.4582F},{0.3741F,0.3741F}
+ },
+ /*6.1*/
+ {
+ {0.4553F,0.0F},{0.322F,0.322F},{0.0F,0.4553F},{0.3943F,0.2277F},
+ {0.2277F,0.3943F},{0.2788F,0.2788F},{0.322F,0.322F}
+ },
+ /*7.1*/
+ {
+ {0.3886F,0.0F},{0.2748F,0.2748F},{0.0F,0.3886F},{0.3366F,0.1943F},
+ {0.1943F,0.3366F},{0.3366F,0.1943F},{0.1943F,0.3366F},{0.2748F,0.2748F}
+ }
+};
+
+#endif
+
+#if defined(OPUS_FIXED_POINT)
+
+/*Matrices for downmixing from the supported channel counts to stereo.
+ The matrices with 5 or more channels are normalized to a total volume of 2.0,
+ since most mixes sound too quiet if normalized to 1.0 (as there is generally
+ little volume in the side/rear channels).
+ Hence we keep the coefficients in Q14, so the downmix values won't overflow a
+ 32-bit number.*/
+static const opus_int16 OP_STEREO_DOWNMIX_Q14
+ [OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={
+ /*3.0*/
+ {
+ {9598,0},{6786,6786},{0,9598}
+ },
+ /*quadrophonic*/
+ {
+ {6924,0},{0,6924},{5996,3464},{3464,5996}
+ },
+ /*5.0*/
+ {
+ {10666,0},{7537,7537},{0,10666},{9234,5331},{5331,9234}
+ },
+ /*5.1*/
+ {
+ {8668,0},{6129,6129},{0,8668},{7507,4335},{4335,7507},{6129,6129}
+ },
+ /*6.1*/
+ {
+ {7459,0},{5275,5275},{0,7459},{6460,3731},{3731,6460},{4568,4568},
+ {5275,5275}
+ },
+ /*7.1*/
+ {
+ {6368,0},{4502,4502},{0,6368},{5515,3183},{3183,5515},{5515,3183},
+ {3183,5515},{4502,4502}
+ }
+};
+
+int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){
+ return op_read_native(_of,_pcm,_buf_size,_li);
+}
+
+static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+ (void)_of;
+ _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+ if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src));
+ else{
+ opus_int16 *dst;
+ int i;
+ dst=(opus_int16 *)_dst;
+ if(_nchannels==1){
+ for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i];
+ }
+ else{
+ for(i=0;i<_nsamples;i++){
+ opus_int32 l;
+ opus_int32 r;
+ int ci;
+ l=r=0;
+ for(ci=0;ci<_nchannels;ci++){
+ opus_int32 s;
+ s=_src[_nchannels*i+ci];
+ l+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][0]*s;
+ r+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][1]*s;
+ }
+ /*TODO: For 5 or more channels, we should do soft clipping here.*/
+ dst[2*i+0]=(opus_int16)OP_CLAMP(-32768,l+8192>>14,32767);
+ dst[2*i+1]=(opus_int16)OP_CLAMP(-32768,r+8192>>14,32767);
+ }
+ }
+ }
+ return _nsamples;
+}
+
+int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){
+ return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL);
+}
+
+# if !defined(OP_DISABLE_FLOAT_API)
+
+static int op_short2float_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+ float *dst;
+ int i;
+ (void)_of;
+ dst=(float *)_dst;
+ if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels;
+ _dst_sz=_nsamples*_nchannels;
+ for(i=0;i<_dst_sz;i++)dst[i]=(1.0F/32768)*_src[i];
+ return _nsamples;
+}
+
+int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){
+ return op_filter_read_native(_of,_pcm,_buf_size,op_short2float_filter,_li);
+}
+
+static int op_short2float_stereo_filter(OggOpusFile *_of,
+ void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){
+ float *dst;
+ int i;
+ dst=(float *)_dst;
+ _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+ if(_nchannels==1){
+ _nsamples=op_short2float_filter(_of,dst,_nsamples,_src,_nsamples,1);
+ for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i];
+ }
+ else if(_nchannels<5){
+ /*For 3 or 4 channels, we can downmix in fixed point without risk of
+ clipping.*/
+ if(_nchannels>2){
+ _nsamples=op_stereo_filter(_of,_src,_nsamples*2,
+ _src,_nsamples,_nchannels);
+ }
+ return op_short2float_filter(_of,dst,_dst_sz,_src,_nsamples,2);
+ }
+ else{
+ /*For 5 or more channels, we convert to floats and then downmix (so that we
+ don't risk clipping).*/
+ for(i=0;i<_nsamples;i++){
+ float l;
+ float r;
+ int ci;
+ l=r=0;
+ for(ci=0;ci<_nchannels;ci++){
+ float s;
+ s=(1.0F/32768)*_src[_nchannels*i+ci];
+ l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*s;
+ r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*s;
+ }
+ dst[2*i+0]=l;
+ dst[2*i+1]=r;
+ }
+ }
+ return _nsamples;
+}
+
+int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){
+ return op_filter_read_native(_of,_pcm,_buf_size,
+ op_short2float_stereo_filter,NULL);
+}
+
+# endif
+
+#else
+
+# if defined(OP_HAVE_LRINTF)
+# include <math.h>
+# define op_float2int(_x) (lrintf(_x))
+# else
+# define op_float2int(_x) ((int)((_x)+((_x)<0?-0.5F:0.5F)))
+# endif
+
+/*The dithering code here is adapted from opusdec, part of opus-tools.
+ It was originally written by Greg Maxwell.*/
+
+static opus_uint32 op_rand(opus_uint32 _seed){
+ return _seed*96314165+907633515&0xFFFFFFFFU;
+}
+
+/*This implements 16-bit quantization with full triangular dither and IIR noise
+ shaping.
+ The noise shaping filters were designed by Sebastian Gesemann, and are based
+ on the LAME ATH curves with flattening to limit their peak gain to 20 dB.
+ Everyone else's noise shaping filters are mildly crazy.
+ The 48 kHz version of this filter is just a warped version of the 44.1 kHz
+ filter and probably could be improved by shifting the HF shelf up in
+ frequency a little bit, since 48 kHz has a bit more room and being more
+ conservative against bat-ears is probably more important than more noise
+ suppression.
+ This process can increase the peak level of the signal (in theory by the peak
+ error of 1.5 +20 dB, though that is unobservably rare).
+ To avoid clipping, the signal is attenuated by a couple thousandths of a dB.
+ Initially, the approach taken here was to only attenuate by the 99.9th
+ percentile, making clipping rare but not impossible (like SoX), but the
+ limited gain of the filter means that the worst case was only two
+ thousandths of a dB more, so this just uses the worst case.
+ The attenuation is probably also helpful to prevent clipping in the DAC
+ reconstruction filters or downstream resampling, in any case.*/
+
+# define OP_GAIN (32753.0F)
+
+# define OP_PRNG_GAIN (1.0F/0xFFFFFFFF)
+
+/*48 kHz noise shaping filter, sd=2.34.*/
+
+static const float OP_FCOEF_B[4]={
+ 2.2374F,-0.7339F,-0.1251F,-0.6033F
+};
+
+static const float OP_FCOEF_A[4]={
+ 0.9030F,0.0116F,-0.5853F,-0.2571F
+};
+
+static int op_float2short_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ float *_src,int _nsamples,int _nchannels){
+ opus_int16 *dst;
+ int ci;
+ int i;
+ dst=(opus_int16 *)_dst;
+ if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels;
+# if defined(OP_SOFT_CLIP)
+ if(_of->state_channel_count!=_nchannels){
+ for(ci=0;ci<_nchannels;ci++)_of->clip_state[ci]=0;
+ }
+ opus_pcm_soft_clip(_src,_nsamples,_nchannels,_of->clip_state);
+# endif
+ if(_of->dither_disabled){
+ for(i=0;i<_nchannels*_nsamples;i++){
+ dst[i]=op_float2int(OP_CLAMP(-32768,32768.0F*_src[i],32767));
+ }
+ }
+ else{
+ opus_uint32 seed;
+ int mute;
+ seed=_of->dither_seed;
+ mute=_of->dither_mute;
+ if(_of->state_channel_count!=_nchannels)mute=65;
+ /*In order to avoid replacing digital silence with quiet dither noise, we
+ mute if the output has been silent for a while.*/
+ if(mute>64)memset(_of->dither_a,0,sizeof(*_of->dither_a)*4*_nchannels);
+ for(i=0;i<_nsamples;i++){
+ int silent;
+ silent=1;
+ for(ci=0;ci<_nchannels;ci++){
+ float r;
+ float s;
+ float err;
+ int si;
+ int j;
+ s=_src[_nchannels*i+ci];
+ silent&=s==0;
+ s*=OP_GAIN;
+ err=0;
+ for(j=0;j<4;j++){
+ err+=OP_FCOEF_B[j]*_of->dither_b[ci*4+j]
+ -OP_FCOEF_A[j]*_of->dither_a[ci*4+j];
+ }
+ for(j=3;j-->0;)_of->dither_a[ci*4+j+1]=_of->dither_a[ci*4+j];
+ for(j=3;j-->0;)_of->dither_b[ci*4+j+1]=_of->dither_b[ci*4+j];
+ _of->dither_a[ci*4]=err;
+ s-=err;
+ if(mute>16)r=0;
+ else{
+ seed=op_rand(seed);
+ r=seed*OP_PRNG_GAIN;
+ seed=op_rand(seed);
+ r-=seed*OP_PRNG_GAIN;
+ }
+ /*Clamp in float out of paranoia that the input will be > 96 dBFS and
+ wrap if the integer is clamped.*/
+ si=op_float2int(OP_CLAMP(-32768,s+r,32767));
+ dst[_nchannels*i+ci]=(opus_int16)si;
+ /*Including clipping in the noise shaping is generally disastrous: the
+ futile effort to restore the clipped energy results in more clipping.
+ However, small amounts---at the level which could normally be created
+ by dither and rounding---are harmless and can even reduce clipping
+ somewhat due to the clipping sometimes reducing the dither + rounding
+ error.*/
+ _of->dither_b[ci*4]=mute>16?0:OP_CLAMP(-1.5F,si-s,1.5F);
+ }
+ mute++;
+ if(!silent)mute=0;
+ }
+ _of->dither_mute=OP_MIN(mute,65);
+ _of->dither_seed=seed;
+ }
+ _of->state_channel_count=_nchannels;
+ return _nsamples;
+}
+
+int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){
+ return op_filter_read_native(_of,_pcm,_buf_size,op_float2short_filter,_li);
+}
+
+int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){
+ _of->state_channel_count=0;
+ return op_read_native(_of,_pcm,_buf_size,_li);
+}
+
+static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+ (void)_of;
+ _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+ if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src));
+ else{
+ float *dst;
+ int i;
+ dst=(float *)_dst;
+ if(_nchannels==1){
+ for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i];
+ }
+ else{
+ for(i=0;i<_nsamples;i++){
+ float l;
+ float r;
+ int ci;
+ l=r=0;
+ for(ci=0;ci<_nchannels;ci++){
+ l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*_src[_nchannels*i+ci];
+ r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*_src[_nchannels*i+ci];
+ }
+ dst[2*i+0]=l;
+ dst[2*i+1]=r;
+ }
+ }
+ }
+ return _nsamples;
+}
+
+static int op_float2short_stereo_filter(OggOpusFile *_of,
+ void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){
+ opus_int16 *dst;
+ dst=(opus_int16 *)_dst;
+ if(_nchannels==1){
+ int i;
+ _nsamples=op_float2short_filter(_of,dst,_dst_sz>>1,_src,_nsamples,1);
+ for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i];
+ }
+ else{
+ if(_nchannels>2){
+ _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+ _nsamples=op_stereo_filter(_of,_src,_nsamples*2,
+ _src,_nsamples,_nchannels);
+ }
+ _nsamples=op_float2short_filter(_of,dst,_dst_sz,_src,_nsamples,2);
+ }
+ return _nsamples;
+}
+
+int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){
+ return op_filter_read_native(_of,_pcm,_buf_size,
+ op_float2short_stereo_filter,NULL);
+}
+
+int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){
+ _of->state_channel_count=0;
+ return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL);
+}
+
+#endif
diff --git a/drivers/opus/opusfile.h b/drivers/opus/opusfile.h
new file mode 100644
index 0000000000..91d06aa9ba
--- /dev/null
+++ b/drivers/opus/opusfile.h
@@ -0,0 +1,2102 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.h 17182 2010-04-29 03:48:32Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_opusfile_h)
+# define _opusfile_h (1)
+
+/**\mainpage
+ \section Introduction
+
+ This is the documentation for the <tt>libopusfile</tt> C API.
+
+ The <tt>libopusfile</tt> package provides a convenient high-level API for
+ decoding and basic manipulation of all Ogg Opus audio streams.
+ <tt>libopusfile</tt> is implemented as a layer on top of Xiph.Org's
+ reference
+ <tt><a href="https://www.xiph.org/ogg/doc/libogg/reference.html">libogg</a></tt>
+ and
+ <tt><a href="https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/index.html">libopus</a></tt>
+ libraries.
+
+ <tt>libopusfile</tt> provides several sets of built-in routines for
+ file/stream access, and may also use custom stream I/O routines provided by
+ the embedded environment.
+ There are built-in I/O routines provided for ANSI-compliant
+ <code>stdio</code> (<code>FILE *</code>), memory buffers, and URLs
+ (including <file:> URLs, plus optionally <http:> and <https:> URLs).
+
+ \section Organization
+
+ The main API is divided into several sections:
+ - \ref stream_open_close
+ - \ref stream_info
+ - \ref stream_decoding
+ - \ref stream_seeking
+
+ Several additional sections are not tied to the main API.
+ - \ref stream_callbacks
+ - \ref header_info
+ - \ref error_codes
+
+ \section Overview
+
+ The <tt>libopusfile</tt> API always decodes files to 48&nbsp;kHz.
+ The original sample rate is not preserved by the lossy compression, though
+ it is stored in the header to allow you to resample to it after decoding
+ (the <tt>libopusfile</tt> API does not currently provide a resampler,
+ but the
+ <a href="http://www.speex.org/docs/manual/speex-manual/node7.html#SECTION00760000000000000000">the
+ Speex resampler</a> is a good choice if you need one).
+ In general, if you are playing back the audio, you should leave it at
+ 48&nbsp;kHz, provided your audio hardware supports it.
+ When decoding to a file, it may be worth resampling back to the original
+ sample rate, so as not to surprise users who might not expect the sample
+ rate to change after encoding to Opus and decoding.
+
+ Opus files can contain anywhere from 1 to 255 channels of audio.
+ The channel mappings for up to 8 channels are the same as the
+ <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+ mappings</a>.
+ A special stereo API can convert everything to 2 channels, making it simple
+ to support multichannel files in an application which only has stereo
+ output.
+ Although the <tt>libopusfile</tt> ABI provides support for the theoretical
+ maximum number of channels, the current implementation does not support
+ files with more than 8 channels, as they do not have well-defined channel
+ mappings.
+
+ Like all Ogg files, Opus files may be "chained".
+ That is, multiple Opus files may be combined into a single, longer file just
+ by concatenating the original files.
+ This is commonly done in internet radio streaming, as it allows the title
+ and artist to be updated each time the song changes, since each link in the
+ chain includes its own set of metadata.
+
+ <tt>libopusfile</tt> fully supports chained files.
+ It will decode the first Opus stream found in each link of a chained file
+ (ignoring any other streams that might be concurrently multiplexed with it,
+ such as a video stream).
+
+ The channel count can also change between links.
+ If your application is not prepared to deal with this, it can use the stereo
+ API to ensure the audio from all links will always get decoded into a
+ common format.
+ Since <tt>libopusfile</tt> always decodes to 48&nbsp;kHz, you do not have to
+ worry about the sample rate changing between links (as was possible with
+ Vorbis).
+ This makes application support for chained files with <tt>libopusfile</tt>
+ very easy.*/
+
+# if defined(__cplusplus)
+extern "C" {
+# endif
+
+# include <stdarg.h>
+# include <stdio.h>
+# include <ogg/ogg.h>
+# include <opus/opus_multistream.h>
+
+/**@cond PRIVATE*/
+
+/*Enable special features for gcc and gcc-compatible compilers.*/
+# if !defined(OP_GNUC_PREREQ)
+# if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+# define OP_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+# else
+# define OP_GNUC_PREREQ(_maj,_min) 0
+# endif
+# endif
+
+# if OP_GNUC_PREREQ(4,0)
+# pragma GCC visibility push(default)
+# endif
+
+typedef struct OpusHead OpusHead;
+typedef struct OpusTags OpusTags;
+typedef struct OpusPictureTag OpusPictureTag;
+typedef struct OpusServerInfo OpusServerInfo;
+typedef struct OpusFileCallbacks OpusFileCallbacks;
+typedef struct OggOpusFile OggOpusFile;
+
+/*Warning attributes for libopusfile functions.*/
+# if OP_GNUC_PREREQ(3,4)
+# define OP_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+# else
+# define OP_WARN_UNUSED_RESULT
+# endif
+# if OP_GNUC_PREREQ(3,4)
+# define OP_ARG_NONNULL(_x) __attribute__((__nonnull__(_x)))
+# else
+# define OP_ARG_NONNULL(_x)
+# endif
+
+/**@endcond*/
+
+/**\defgroup error_codes Error Codes*/
+/*@{*/
+/**\name List of possible error codes
+ Many of the functions in this library return a negative error code when a
+ function fails.
+ This list provides a brief explanation of the common errors.
+ See each individual function for more details on what a specific error code
+ means in that context.*/
+/*@{*/
+
+/**A request did not succeed.*/
+#define OP_FALSE (-1)
+/*Currently not used externally.*/
+#define OP_EOF (-2)
+/**There was a hole in the page sequence numbers (e.g., a page was corrupt or
+ missing).*/
+#define OP_HOLE (-3)
+/**An underlying read, seek, or tell operation failed when it should have
+ succeeded.*/
+#define OP_EREAD (-128)
+/**A <code>NULL</code> pointer was passed where one was unexpected, or an
+ internal memory allocation failed, or an internal library error was
+ encountered.*/
+#define OP_EFAULT (-129)
+/**The stream used a feature that is not implemented, such as an unsupported
+ channel family.*/
+#define OP_EIMPL (-130)
+/**One or more parameters to a function were invalid.*/
+#define OP_EINVAL (-131)
+/**A purported Ogg Opus stream did not begin with an Ogg page, a purported
+ header packet did not start with one of the required strings, "OpusHead" or
+ "OpusTags", or a link in a chained file was encountered that did not
+ contain any logical Opus streams.*/
+#define OP_ENOTFORMAT (-132)
+/**A required header packet was not properly formatted, contained illegal
+ values, or was missing altogether.*/
+#define OP_EBADHEADER (-133)
+/**The ID header contained an unrecognized version number.*/
+#define OP_EVERSION (-134)
+/*Currently not used at all.*/
+#define OP_ENOTAUDIO (-135)
+/**An audio packet failed to decode properly.
+ This is usually caused by a multistream Ogg packet where the durations of
+ the individual Opus packets contained in it are not all the same.*/
+#define OP_EBADPACKET (-136)
+/**We failed to find data we had seen before, or the bitstream structure was
+ sufficiently malformed that seeking to the target destination was
+ impossible.*/
+#define OP_EBADLINK (-137)
+/**An operation that requires seeking was requested on an unseekable stream.*/
+#define OP_ENOSEEK (-138)
+/**The first or last granule position of a link failed basic validity checks.*/
+#define OP_EBADTIMESTAMP (-139)
+
+/*@}*/
+/*@}*/
+
+/**\defgroup header_info Header Information*/
+/*@{*/
+
+/**The maximum number of channels in an Ogg Opus stream.*/
+#define OPUS_CHANNEL_COUNT_MAX (255)
+
+/**Ogg Opus bitstream information.
+ This contains the basic playback parameters for a stream, and corresponds to
+ the initial ID header packet of an Ogg Opus stream.*/
+struct OpusHead{
+ /**The Ogg Opus format version, in the range 0...255.
+ The top 4 bits represent a "major" version, and the bottom four bits
+ represent backwards-compatible "minor" revisions.
+ The current specification describes version 1.
+ This library will recognize versions up through 15 as backwards compatible
+ with the current specification.
+ An earlier draft of the specification described a version 0, but the only
+ difference between version 1 and version 0 is that version 0 did
+ not specify the semantics for handling the version field.*/
+ int version;
+ /**The number of channels, in the range 1...255.*/
+ int channel_count;
+ /**The number of samples that should be discarded from the beginning of the
+ stream.*/
+ unsigned pre_skip;
+ /**The sampling rate of the original input.
+ All Opus audio is coded at 48 kHz, and should also be decoded at 48 kHz
+ for playback (unless the target hardware does not support this sampling
+ rate).
+ However, this field may be used to resample the audio back to the original
+ sampling rate, for example, when saving the output to a file.*/
+ opus_uint32 input_sample_rate;
+ /**The gain to apply to the decoded output, in dB, as a Q8 value in the range
+ -32768...32767.
+ The <tt>libopusfile</tt> API will automatically apply this gain to the
+ decoded output before returning it, scaling it by
+ <code>pow(10,output_gain/(20.0*256))</code>.*/
+ int output_gain;
+ /**The channel mapping family, in the range 0...255.
+ Channel mapping family 0 covers mono or stereo in a single stream.
+ Channel mapping family 1 covers 1 to 8 channels in one or more streams,
+ using the Vorbis speaker assignments.
+ Channel mapping family 255 covers 1 to 255 channels in one or more
+ streams, but without any defined speaker assignment.*/
+ int mapping_family;
+ /**The number of Opus streams in each Ogg packet, in the range 1...255.*/
+ int stream_count;
+ /**The number of coupled Opus streams in each Ogg packet, in the range
+ 0...127.
+ This must satisfy <code>0 <= coupled_count <= stream_count</code> and
+ <code>coupled_count + stream_count <= 255</code>.
+ The coupled streams appear first, before all uncoupled streams, in an Ogg
+ Opus packet.*/
+ int coupled_count;
+ /**The mapping from coded stream channels to output channels.
+ Let <code>index=mapping[k]</code> be the value for channel <code>k</code>.
+ If <code>index<2*coupled_count</code>, then it refers to the left channel
+ from stream <code>(index/2)</code> if even, and the right channel from
+ stream <code>(index/2)</code> if odd.
+ Otherwise, it refers to the output of the uncoupled stream
+ <code>(index-coupled_count)</code>.*/
+ unsigned char mapping[OPUS_CHANNEL_COUNT_MAX];
+};
+
+/**The metadata from an Ogg Opus stream.
+
+ This structure holds the in-stream metadata corresponding to the 'comment'
+ header packet of an Ogg Opus stream.
+ The comment header is meant to be used much like someone jotting a quick
+ note on the label of a CD.
+ It should be a short, to the point text note that can be more than a couple
+ words, but not more than a short paragraph.
+
+ The metadata is stored as a series of (tag, value) pairs, in length-encoded
+ string vectors, using the same format as Vorbis (without the final "framing
+ bit"), Theora, and Speex, except for the packet header.
+ The first occurrence of the '=' character delimits the tag and value.
+ A particular tag may occur more than once, and order is significant.
+ The character set encoding for the strings is always UTF-8, but the tag
+ names are limited to ASCII, and treated as case-insensitive.
+ See <a href="http://www.xiph.org/vorbis/doc/v-comment.html">the Vorbis
+ comment header specification</a> for details.
+
+ In filling in this structure, <tt>libopusfile</tt> will null-terminate the
+ #user_comments strings for safety.
+ However, the bitstream format itself treats them as 8-bit clean vectors,
+ possibly containing NUL characters, so the #comment_lengths array should be
+ treated as their authoritative length.
+
+ This structure is binary and source-compatible with a
+ <code>vorbis_comment</code>, and pointers to it may be freely cast to
+ <code>vorbis_comment</code> pointers, and vice versa.
+ It is provided as a separate type to avoid introducing a compile-time
+ dependency on the libvorbis headers.*/
+struct OpusTags{
+ /**The array of comment string vectors.*/
+ char **user_comments;
+ /**An array of the corresponding length of each vector, in bytes.*/
+ int *comment_lengths;
+ /**The total number of comment streams.*/
+ int comments;
+ /**The null-terminated vendor string.
+ This identifies the software used to encode the stream.*/
+ char *vendor;
+};
+
+/**\name Picture tag image formats*/
+/*@{*/
+
+/**The MIME type was not recognized, or the image data did not match the
+ declared MIME type.*/
+#define OP_PIC_FORMAT_UNKNOWN (-1)
+/**The MIME type indicates the image data is really a URL.*/
+#define OP_PIC_FORMAT_URL (0)
+/**The image is a JPEG.*/
+#define OP_PIC_FORMAT_JPEG (1)
+/**The image is a PNG.*/
+#define OP_PIC_FORMAT_PNG (2)
+/**The image is a GIF.*/
+#define OP_PIC_FORMAT_GIF (3)
+
+/*@}*/
+
+/**The contents of a METADATA_BLOCK_PICTURE tag.*/
+struct OpusPictureTag{
+ /**The picture type according to the ID3v2 APIC frame:
+ <ol start="0">
+ <li>Other</li>
+ <li>32x32 pixels 'file icon' (PNG only)</li>
+ <li>Other file icon</li>
+ <li>Cover (front)</li>
+ <li>Cover (back)</li>
+ <li>Leaflet page</li>
+ <li>Media (e.g. label side of CD)</li>
+ <li>Lead artist/lead performer/soloist</li>
+ <li>Artist/performer</li>
+ <li>Conductor</li>
+ <li>Band/Orchestra</li>
+ <li>Composer</li>
+ <li>Lyricist/text writer</li>
+ <li>Recording Location</li>
+ <li>During recording</li>
+ <li>During performance</li>
+ <li>Movie/video screen capture</li>
+ <li>A bright colored fish</li>
+ <li>Illustration</li>
+ <li>Band/artist logotype</li>
+ <li>Publisher/Studio logotype</li>
+ </ol>
+ Others are reserved and should not be used.
+ There may only be one each of picture type 1 and 2 in a file.*/
+ opus_int32 type;
+ /**The MIME type of the picture, in printable ASCII characters 0x20-0x7E.
+ The MIME type may also be <code>"-->"</code> to signify that the data part
+ is a URL pointing to the picture instead of the picture data itself.
+ In this case, a terminating NUL is appended to the URL string in #data,
+ but #data_length is set to the length of the string excluding that
+ terminating NUL.*/
+ char *mime_type;
+ /**The description of the picture, in UTF-8.*/
+ char *description;
+ /**The width of the picture in pixels.*/
+ opus_uint32 width;
+ /**The height of the picture in pixels.*/
+ opus_uint32 height;
+ /**The color depth of the picture in bits-per-pixel (<em>not</em>
+ bits-per-channel).*/
+ opus_uint32 depth;
+ /**For indexed-color pictures (e.g., GIF), the number of colors used, or 0
+ for non-indexed pictures.*/
+ opus_uint32 colors;
+ /**The length of the picture data in bytes.*/
+ opus_uint32 data_length;
+ /**The binary picture data.*/
+ unsigned char *data;
+ /**The format of the picture data, if known.
+ One of
+ <ul>
+ <li>#OP_PIC_FORMAT_UNKNOWN,</li>
+ <li>#OP_PIC_FORMAT_URL,</li>
+ <li>#OP_PIC_FORMAT_JPEG,</li>
+ <li>#OP_PIC_FORMAT_PNG, or</li>
+ <li>#OP_PIC_FORMAT_GIF.</li>
+ </ul>*/
+ int format;
+};
+
+/**\name Functions for manipulating header data
+
+ These functions manipulate the #OpusHead and #OpusTags structures,
+ which describe the audio parameters and tag-value metadata, respectively.
+ These can be used to query the headers returned by <tt>libopusfile</tt>, or
+ to parse Opus headers from sources other than an Ogg Opus stream, provided
+ they use the same format.*/
+/*@{*/
+
+/**Parses the contents of the ID header packet of an Ogg Opus stream.
+ \param[out] _head Returns the contents of the parsed packet.
+ The contents of this structure are untouched on error.
+ This may be <code>NULL</code> to merely test the header
+ for validity.
+ \param[in] _data The contents of the ID header packet.
+ \param _len The number of bytes of data in the ID header packet.
+ \return 0 on success or a negative value on error.
+ \retval #OP_ENOTFORMAT If the data does not start with the "OpusHead"
+ string.
+ \retval #OP_EVERSION If the version field signaled a version this library
+ does not know how to parse.
+ \retval #OP_EIMPL If the channel mapping family was 255, which general
+ purpose players should not attempt to play.
+ \retval #OP_EBADHEADER If the contents of the packet otherwise violate the
+ Ogg Opus specification:
+ <ul>
+ <li>Insufficient data,</li>
+ <li>Too much data for the known minor versions,</li>
+ <li>An unrecognized channel mapping family,</li>
+ <li>Zero channels or too many channels,</li>
+ <li>Zero coded streams,</li>
+ <li>Too many coupled streams, or</li>
+ <li>An invalid channel mapping index.</li>
+ </ul>*/
+OP_WARN_UNUSED_RESULT int opus_head_parse(OpusHead *_head,
+ const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2);
+
+/**Converts a granule position to a sample offset for a given Ogg Opus stream.
+ The sample offset is simply <code>_gp-_head->pre_skip</code>.
+ Granule position values smaller than OpusHead#pre_skip correspond to audio
+ that should never be played, and thus have no associated sample offset.
+ This function returns -1 for such values.
+ This function also correctly handles extremely large granule positions,
+ which may have wrapped around to a negative number when stored in a signed
+ ogg_int64_t value.
+ \param _head The #OpusHead information from the ID header of the stream.
+ \param _gp The granule position to convert.
+ \return The sample offset associated with the given granule position
+ (counting at a 48 kHz sampling rate), or the special value -1 on
+ error (i.e., the granule position was smaller than the pre-skip
+ amount).*/
+ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp)
+ OP_ARG_NONNULL(1);
+
+/**Parses the contents of the 'comment' header packet of an Ogg Opus stream.
+ \param[out] _tags An uninitialized #OpusTags structure.
+ This returns the contents of the parsed packet.
+ The contents of this structure are untouched on error.
+ This may be <code>NULL</code> to merely test the header
+ for validity.
+ \param[in] _data The contents of the 'comment' header packet.
+ \param _len The number of bytes of data in the 'info' header packet.
+ \retval 0 Success.
+ \retval #OP_ENOTFORMAT If the data does not start with the "OpusTags"
+ string.
+ \retval #OP_EBADHEADER If the contents of the packet otherwise violate the
+ Ogg Opus specification.
+ \retval #OP_EFAULT If there wasn't enough memory to store the tags.*/
+OP_WARN_UNUSED_RESULT int opus_tags_parse(OpusTags *_tags,
+ const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2);
+
+/**Performs a deep copy of an #OpusTags structure.
+ \param _dst The #OpusTags structure to copy into.
+ If this function fails, the contents of this structure remain
+ untouched.
+ \param _src The #OpusTags structure to copy from.
+ \retval 0 Success.
+ \retval #OP_EFAULT If there wasn't enough memory to copy the tags.*/
+int opus_tags_copy(OpusTags *_dst,const OpusTags *_src) OP_ARG_NONNULL(1);
+
+/**Initializes an #OpusTags structure.
+ This should be called on a freshly allocated #OpusTags structure before
+ attempting to use it.
+ \param _tags The #OpusTags structure to initialize.*/
+void opus_tags_init(OpusTags *_tags) OP_ARG_NONNULL(1);
+
+/**Add a (tag, value) pair to an initialized #OpusTags structure.
+ \note Neither opus_tags_add() nor opus_tags_add_comment() support values
+ containing embedded NULs, although the bitstream format does support them.
+ To add such tags, you will need to manipulate the #OpusTags structure
+ directly.
+ \param _tags The #OpusTags structure to add the (tag, value) pair to.
+ \param _tag A NUL-terminated, case-insensitive, ASCII string containing
+ the tag to add (without an '=' character).
+ \param _value A NUL-terminated UTF-8 containing the corresponding value.
+ \return 0 on success, or a negative value on failure.
+ \retval #OP_EFAULT An internal memory allocation failed.*/
+int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) OP_ARG_NONNULL(3);
+
+/**Add a comment to an initialized #OpusTags structure.
+ \note Neither opus_tags_add_comment() nor opus_tags_add() support comments
+ containing embedded NULs, although the bitstream format does support them.
+ To add such tags, you will need to manipulate the #OpusTags structure
+ directly.
+ \param _tags The #OpusTags structure to add the comment to.
+ \param _comment A NUL-terminated UTF-8 string containing the comment in
+ "TAG=value" form.
+ \return 0 on success, or a negative value on failure.
+ \retval #OP_EFAULT An internal memory allocation failed.*/
+int opus_tags_add_comment(OpusTags *_tags,const char *_comment)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Look up a comment value by its tag.
+ \param _tags An initialized #OpusTags structure.
+ \param _tag The tag to look up.
+ \param _count The instance of the tag.
+ The same tag can appear multiple times, each with a distinct
+ value, so an index is required to retrieve them all.
+ The order in which these values appear is significant and
+ should be preserved.
+ Use opus_tags_query_count() to get the legal range for the
+ \a _count parameter.
+ \return A pointer to the queried tag's value.
+ This points directly to data in the #OpusTags structure.
+ It should not be modified or freed by the application, and
+ modifications to the structure may invalidate the pointer.
+ \retval NULL If no matching tag is found.*/
+const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Look up the number of instances of a tag.
+ Call this first when querying for a specific tag and then iterate over the
+ number of instances with separate calls to opus_tags_query() to retrieve
+ all the values for that tag in order.
+ \param _tags An initialized #OpusTags structure.
+ \param _tag The tag to look up.
+ \return The number of instances of this particular tag.*/
+int opus_tags_query_count(const OpusTags *_tags,const char *_tag)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Get the track gain from an R128_TRACK_GAIN tag, if one was specified.
+ This searches for the first R128_TRACK_GAIN tag with a valid signed,
+ 16-bit decimal integer value and returns the value.
+ This routine is exposed merely for convenience for applications which wish
+ to do something special with the track gain (i.e., display it).
+ If you simply wish to apply the track gain instead of the header gain, you
+ can use op_set_gain_offset() with an #OP_TRACK_GAIN type and no offset.
+ \param _tags An initialized #OpusTags structure.
+ \param[out] _gain_q8 The track gain, in 1/256ths of a dB.
+ This will lie in the range [-32768,32767], and should
+ be applied in <em>addition</em> to the header gain.
+ On error, no value is returned, and the previous
+ contents remain unchanged.
+ \return 0 on success, or a negative value on error.
+ \retval #OP_FALSE There was no track gain available in the given tags.*/
+int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Clears the #OpusTags structure.
+ This should be called on an #OpusTags structure after it is no longer
+ needed.
+ It will free all memory used by the structure members.
+ \param _tags The #OpusTags structure to clear.*/
+void opus_tags_clear(OpusTags *_tags) OP_ARG_NONNULL(1);
+
+/**Check if \a _comment is an instance of a \a _tag_name tag.
+ \see opus_tagncompare
+ \param _tag_name A NUL-terminated, case-insensitive, ASCII string containing
+ the name of the tag to check for (without the terminating
+ '=' character).
+ \param _comment The comment string to check.
+ \return An integer less than, equal to, or greater than zero if \a _comment
+ is found respectively, to be less than, to match, or be greater
+ than a "tag=value" string whose tag matches \a _tag_name.*/
+int opus_tagcompare(const char *_tag_name,const char *_comment);
+
+/**Check if \a _comment is an instance of a \a _tag_name tag.
+ This version is slightly more efficient than opus_tagcompare() if the length
+ of the tag name is already known (e.g., because it is a constant).
+ \see opus_tagcompare
+ \param _tag_name A case-insensitive ASCII string containing the name of the
+ tag to check for (without the terminating '=' character).
+ \param _tag_len The number of characters in the tag name.
+ This must be non-negative.
+ \param _comment The comment string to check.
+ \return An integer less than, equal to, or greater than zero if \a _comment
+ is found respectively, to be less than, to match, or be greater
+ than a "tag=value" string whose tag matches the first \a _tag_len
+ characters of \a _tag_name.*/
+int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment);
+
+/**Parse a single METADATA_BLOCK_PICTURE tag.
+ This decodes the BASE64-encoded content of the tag and returns a structure
+ with the MIME type, description, image parameters (if known), and the
+ compressed image data.
+ If the MIME type indicates the presence of an image format we recognize
+ (JPEG, PNG, or GIF) and the actual image data contains the magic signature
+ associated with that format, then the OpusPictureTag::format field will be
+ set to the corresponding format.
+ This is provided as a convenience to avoid requiring applications to parse
+ the MIME type and/or do their own format detection for the commonly used
+ formats.
+ In this case, we also attempt to extract the image parameters directly from
+ the image data (overriding any that were present in the tag, which the
+ specification says applications are not meant to rely on).
+ The application must still provide its own support for actually decoding the
+ image data and, if applicable, retrieving that data from URLs.
+ \param[out] _pic Returns the parsed picture data.
+ No sanitation is done on the type, MIME type, or
+ description fields, so these might return invalid values.
+ The contents of this structure are left unmodified on
+ failure.
+ \param _tag The METADATA_BLOCK_PICTURE tag contents.
+ The leading "METADATA_BLOCK_PICTURE=" portion is optional,
+ to allow the function to be used on either directly on the
+ values in OpusTags::user_comments or on the return value
+ of opus_tags_query().
+ \return 0 on success or a negative value on error.
+ \retval #OP_ENOTFORMAT The METADATA_BLOCK_PICTURE contents were not valid.
+ \retval #OP_EFAULT There was not enough memory to store the picture tag
+ contents.*/
+OP_WARN_UNUSED_RESULT int opus_picture_tag_parse(OpusPictureTag *_pic,
+ const char *_tag) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Initializes an #OpusPictureTag structure.
+ This should be called on a freshly allocated #OpusPictureTag structure
+ before attempting to use it.
+ \param _pic The #OpusPictureTag structure to initialize.*/
+void opus_picture_tag_init(OpusPictureTag *_pic) OP_ARG_NONNULL(1);
+
+/**Clears the #OpusPictureTag structure.
+ This should be called on an #OpusPictureTag structure after it is no longer
+ needed.
+ It will free all memory used by the structure members.
+ \param _pic The #OpusPictureTag structure to clear.*/
+void opus_picture_tag_clear(OpusPictureTag *_pic) OP_ARG_NONNULL(1);
+
+/*@}*/
+
+/*@}*/
+
+/**\defgroup url_options URL Reading Options*/
+/*@{*/
+/**\name URL reading options
+ Options for op_url_stream_create() and associated functions.
+ These allow you to provide proxy configuration parameters, skip SSL
+ certificate checks, etc.
+ Options are processed in order, and if the same option is passed multiple
+ times, only the value specified by the last occurrence has an effect
+ (unless otherwise specified).
+ They may be expanded in the future.*/
+/*@{*/
+
+/**@cond PRIVATE*/
+
+/*These are the raw numbers used to define the request codes.
+ They should not be used directly.*/
+#define OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST (6464)
+#define OP_HTTP_PROXY_HOST_REQUEST (6528)
+#define OP_HTTP_PROXY_PORT_REQUEST (6592)
+#define OP_HTTP_PROXY_USER_REQUEST (6656)
+#define OP_HTTP_PROXY_PASS_REQUEST (6720)
+#define OP_GET_SERVER_INFO_REQUEST (6784)
+
+#define OP_URL_OPT(_request) ((_request)+(char *)0)
+
+/*These macros trigger compilation errors or warnings if the wrong types are
+ provided to one of the URL options.*/
+#define OP_CHECK_INT(_x) ((void)((_x)==(opus_int32)0),(opus_int32)(_x))
+#define OP_CHECK_CONST_CHAR_PTR(_x) ((_x)+((_x)-(const char *)(_x)))
+#define OP_CHECK_SERVER_INFO_PTR(_x) ((_x)+((_x)-(OpusServerInfo *)(_x)))
+
+/**@endcond*/
+
+/**HTTP/Shoutcast/Icecast server information associated with a URL.*/
+struct OpusServerInfo{
+ /**The name of the server (icy-name/ice-name).
+ This is <code>NULL</code> if there was no <code>icy-name</code> or
+ <code>ice-name</code> header.*/
+ char *name;
+ /**A short description of the server (icy-description/ice-description).
+ This is <code>NULL</code> if there was no <code>icy-description</code> or
+ <code>ice-description</code> header.*/
+ char *description;
+ /**The genre the server falls under (icy-genre/ice-genre).
+ This is <code>NULL</code> if there was no <code>icy-genre</code> or
+ <code>ice-genre</code> header.*/
+ char *genre;
+ /**The homepage for the server (icy-url/ice-url).
+ This is <code>NULL</code> if there was no <code>icy-url</code> or
+ <code>ice-url</code> header.*/
+ char *url;
+ /**The software used by the origin server (Server).
+ This is <code>NULL</code> if there was no <code>Server</code> header.*/
+ char *server;
+ /**The media type of the entity sent to the recepient (Content-Type).
+ This is <code>NULL</code> if there was no <code>Content-Type</code>
+ header.*/
+ char *content_type;
+ /**The nominal stream bitrate in kbps (icy-br/ice-bitrate).
+ This is <code>-1</code> if there was no <code>icy-br</code> or
+ <code>ice-bitrate</code> header.*/
+ opus_int32 bitrate_kbps;
+ /**Flag indicating whether the server is public (<code>1</code>) or not
+ (<code>0</code>) (icy-pub/ice-public).
+ This is <code>-1</code> if there was no <code>icy-pub</code> or
+ <code>ice-public</code> header.*/
+ int is_public;
+ /**Flag indicating whether the server is using HTTPS instead of HTTP.
+ This is <code>0</code> unless HTTPS is being used.
+ This may not match the protocol used in the original URL if there were
+ redirections.*/
+ int is_ssl;
+};
+
+/**Initializes an #OpusServerInfo structure.
+ All fields are set as if the corresponding header was not available.
+ \param _info The #OpusServerInfo structure to initialize.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.*/
+void opus_server_info_init(OpusServerInfo *_info) OP_ARG_NONNULL(1);
+
+/**Clears the #OpusServerInfo structure.
+ This should be called on an #OpusServerInfo structure after it is no longer
+ needed.
+ It will free all memory used by the structure members.
+ \param _info The #OpusServerInfo structure to clear.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.*/
+void opus_server_info_clear(OpusServerInfo *_info) OP_ARG_NONNULL(1);
+
+/**Skip the certificate check when connecting via TLS/SSL (https).
+ \param _b <code>opus_int32</code>: Whether or not to skip the certificate
+ check.
+ The check will be skipped if \a _b is non-zero, and will not be
+ skipped if \a _b is zero.
+ \hideinitializer*/
+#define OP_SSL_SKIP_CERTIFICATE_CHECK(_b) \
+ OP_URL_OPT(OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST),OP_CHECK_INT(_b)
+
+/**Proxy connections through the given host.
+ If no port is specified via #OP_HTTP_PROXY_PORT, the port number defaults
+ to 8080 (http-alt).
+ All proxy parameters are ignored for non-http and non-https URLs.
+ \param _host <code>const char *</code>: The proxy server hostname.
+ This may be <code>NULL</code> to disable the use of a proxy
+ server.
+ \hideinitializer*/
+#define OP_HTTP_PROXY_HOST(_host) \
+ OP_URL_OPT(OP_HTTP_PROXY_HOST_REQUEST),OP_CHECK_CONST_CHAR_PTR(_host)
+
+/**Use the given port when proxying connections.
+ This option only has an effect if #OP_HTTP_PROXY_HOST is specified with a
+ non-<code>NULL</code> \a _host.
+ If this option is not provided, the proxy port number defaults to 8080
+ (http-alt).
+ All proxy parameters are ignored for non-http and non-https URLs.
+ \param _port <code>opus_int32</code>: The proxy server port.
+ This must be in the range 0...65535 (inclusive), or the
+ URL function this is passed to will fail.
+ \hideinitializer*/
+#define OP_HTTP_PROXY_PORT(_port) \
+ OP_URL_OPT(OP_HTTP_PROXY_PORT_REQUEST),OP_CHECK_INT(_port)
+
+/**Use the given user name for authentication when proxying connections.
+ All proxy parameters are ignored for non-http and non-https URLs.
+ \param _user const char *: The proxy server user name.
+ This may be <code>NULL</code> to disable proxy
+ authentication.
+ A non-<code>NULL</code> value only has an effect
+ if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_PASS
+ are also specified with non-<code>NULL</code>
+ arguments.
+ \hideinitializer*/
+#define OP_HTTP_PROXY_USER(_user) \
+ OP_URL_OPT(OP_HTTP_PROXY_USER_REQUEST),OP_CHECK_CONST_CHAR_PTR(_user)
+
+/**Use the given password for authentication when proxying connections.
+ All proxy parameters are ignored for non-http and non-https URLs.
+ \param _pass const char *: The proxy server password.
+ This may be <code>NULL</code> to disable proxy
+ authentication.
+ A non-<code>NULL</code> value only has an effect
+ if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_USER
+ are also specified with non-<code>NULL</code>
+ arguments.
+ \hideinitializer*/
+#define OP_HTTP_PROXY_PASS(_pass) \
+ OP_URL_OPT(OP_HTTP_PROXY_PASS_REQUEST),OP_CHECK_CONST_CHAR_PTR(_pass)
+
+/**Parse information about the streaming server (if any) and return it.
+ Very little validation is done.
+ In particular, OpusServerInfo::url may not be a valid URL,
+ OpusServerInfo::bitrate_kbps may not really be in kbps, and
+ OpusServerInfo::content_type may not be a valid MIME type.
+ The character set of the string fields is not specified anywhere, and should
+ not be assumed to be valid UTF-8.
+ \param _info OpusServerInfo *: Returns information about the server.
+ If there is any error opening the stream, the
+ contents of this structure remain
+ unmodified.
+ On success, fills in the structure with the
+ server information that was available, if
+ any.
+ After a successful return, the contents of
+ this structure should be freed by calling
+ opus_server_info_clear().
+ \hideinitializer*/
+#define OP_GET_SERVER_INFO(_info) \
+ OP_URL_OPT(OP_GET_SERVER_INFO_REQUEST),OP_CHECK_SERVER_INFO_PTR(_info)
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_callbacks Abstract Stream Reading Interface*/
+/*@{*/
+/**\name Functions for reading from streams
+ These functions define the interface used to read from and seek in a stream
+ of data.
+ A stream does not need to implement seeking, but the decoder will not be
+ able to seek if it does not do so.
+ These functions also include some convenience routines for working with
+ standard <code>FILE</code> pointers, complete streams stored in a single
+ block of memory, or URLs.*/
+/*@{*/
+
+/**Reads up to \a _nbytes bytes of data from \a _stream.
+ \param _stream The stream to read from.
+ \param[out] _ptr The buffer to store the data in.
+ \param _nbytes The maximum number of bytes to read.
+ This function may return fewer, though it will not
+ return zero unless it reaches end-of-file.
+ \return The number of bytes successfully read, or a negative value on
+ error.*/
+typedef int (*op_read_func)(void *_stream,unsigned char *_ptr,int _nbytes);
+
+/**Sets the position indicator for \a _stream.
+ The new position, measured in bytes, is obtained by adding \a _offset
+ bytes to the position specified by \a _whence.
+ If \a _whence is set to <code>SEEK_SET</code>, <code>SEEK_CUR</code>, or
+ <code>SEEK_END</code>, the offset is relative to the start of the stream,
+ the current position indicator, or end-of-file, respectively.
+ \retval 0 Success.
+ \retval -1 Seeking is not supported or an error occurred.
+ <code>errno</code> need not be set.*/
+typedef int (*op_seek_func)(void *_stream,opus_int64 _offset,int _whence);
+
+/**Obtains the current value of the position indicator for \a _stream.
+ \return The current position indicator.*/
+typedef opus_int64 (*op_tell_func)(void *_stream);
+
+/**Closes the underlying stream.
+ \retval 0 Success.
+ \retval EOF An error occurred.
+ <code>errno</code> need not be set.*/
+typedef int (*op_close_func)(void *_stream);
+
+/**The callbacks used to access non-<code>FILE</code> stream resources.
+ The function prototypes are basically the same as for the stdio functions
+ <code>fread()</code>, <code>fseek()</code>, <code>ftell()</code>, and
+ <code>fclose()</code>.
+ The differences are that the <code>FILE *</code> arguments have been
+ replaced with a <code>void *</code>, which is to be used as a pointer to
+ whatever internal data these functions might need, that #seek and #tell
+ take and return 64-bit offsets, and that #seek <em>must</em> return -1 if
+ the stream is unseekable.*/
+struct OpusFileCallbacks{
+ /**Used to read data from the stream.
+ This must not be <code>NULL</code>.*/
+ op_read_func read;
+ /**Used to seek in the stream.
+ This may be <code>NULL</code> if seeking is not implemented.*/
+ op_seek_func seek;
+ /**Used to return the current read position in the stream.
+ This may be <code>NULL</code> if seeking is not implemented.*/
+ op_tell_func tell;
+ /**Used to close the stream when the decoder is freed.
+ This may be <code>NULL</code> to leave the stream open.*/
+ op_close_func close;
+};
+
+/**Opens a stream with <code>fopen()</code> and fills in a set of callbacks
+ that can be used to access it.
+ This is useful to avoid writing your own portable 64-bit seeking wrappers,
+ and also avoids cross-module linking issues on Windows, where a
+ <code>FILE *</code> must be accessed by routines defined in the same module
+ that opened it.
+ \param[out] _cb The callbacks to use for this file.
+ If there is an error opening the file, nothing will be
+ filled in here.
+ \param _path The path to the file to open.
+ On Windows, this string must be UTF-8 (to allow access to
+ files whose names cannot be represented in the current
+ MBCS code page).
+ All other systems use the native character encoding.
+ \param _mode The mode to open the file in.
+ \return A stream handle to use with the callbacks, or <code>NULL</code> on
+ error.*/
+OP_WARN_UNUSED_RESULT void *op_fopen(OpusFileCallbacks *_cb,
+ const char *_path,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2)
+ OP_ARG_NONNULL(3);
+
+/**Opens a stream with <code>fdopen()</code> and fills in a set of callbacks
+ that can be used to access it.
+ This is useful to avoid writing your own portable 64-bit seeking wrappers,
+ and also avoids cross-module linking issues on Windows, where a
+ <code>FILE *</code> must be accessed by routines defined in the same module
+ that opened it.
+ \param[out] _cb The callbacks to use for this file.
+ If there is an error opening the file, nothing will be
+ filled in here.
+ \param _fd The file descriptor to open.
+ \param _mode The mode to open the file in.
+ \return A stream handle to use with the callbacks, or <code>NULL</code> on
+ error.*/
+OP_WARN_UNUSED_RESULT void *op_fdopen(OpusFileCallbacks *_cb,
+ int _fd,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(3);
+
+/**Opens a stream with <code>freopen()</code> and fills in a set of callbacks
+ that can be used to access it.
+ This is useful to avoid writing your own portable 64-bit seeking wrappers,
+ and also avoids cross-module linking issues on Windows, where a
+ <code>FILE *</code> must be accessed by routines defined in the same module
+ that opened it.
+ \param[out] _cb The callbacks to use for this file.
+ If there is an error opening the file, nothing will be
+ filled in here.
+ \param _path The path to the file to open.
+ On Windows, this string must be UTF-8 (to allow access
+ to files whose names cannot be represented in the
+ current MBCS code page).
+ All other systems use the native character encoding.
+ \param _mode The mode to open the file in.
+ \param _stream A stream previously returned by op_fopen(), op_fdopen(),
+ or op_freopen().
+ \return A stream handle to use with the callbacks, or <code>NULL</code> on
+ error.*/
+OP_WARN_UNUSED_RESULT void *op_freopen(OpusFileCallbacks *_cb,
+ const char *_path,const char *_mode,void *_stream) OP_ARG_NONNULL(1)
+ OP_ARG_NONNULL(2) OP_ARG_NONNULL(3) OP_ARG_NONNULL(4);
+
+/**Creates a stream that reads from the given block of memory.
+ This block of memory must contain the complete stream to decode.
+ This is useful for caching small streams (e.g., sound effects) in RAM.
+ \param[out] _cb The callbacks to use for this stream.
+ If there is an error creating the stream, nothing will be
+ filled in here.
+ \param _data The block of memory to read from.
+ \param _size The size of the block of memory.
+ \return A stream handle to use with the callbacks, or <code>NULL</code> on
+ error.*/
+OP_WARN_UNUSED_RESULT void *op_mem_stream_create(OpusFileCallbacks *_cb,
+ const unsigned char *_data,size_t _size) OP_ARG_NONNULL(1);
+
+/**Creates a stream that reads from the given URL.
+ This function behaves identically to op_url_stream_create(), except that it
+ takes a va_list instead of a variable number of arguments.
+ It does not call the <code>va_end</code> macro, and because it invokes the
+ <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.
+ \param[out] _cb The callbacks to use for this stream.
+ If there is an error creating the stream, nothing will
+ be filled in here.
+ \param _url The URL to read from.
+ Currently only the <file:>, <http:>, and <https:>
+ schemes are supported.
+ Both <http:> and <https:> may be disabled at compile
+ time, in which case opening such URLs will always fail.
+ Currently this only supports URIs.
+ IRIs should be converted to UTF-8 and URL-escaped, with
+ internationalized domain names encoded in punycode,
+ before passing them to this function.
+ \param[in,out] _ap A list of the \ref url_options "optional flags" to use.
+ This is a variable-length list of options terminated
+ with <code>NULL</code>.
+ \return A stream handle to use with the callbacks, or <code>NULL</code> on
+ error.*/
+OP_WARN_UNUSED_RESULT void *op_url_stream_vcreate(OpusFileCallbacks *_cb,
+ const char *_url,va_list _ap) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Creates a stream that reads from the given URL.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.
+ \param[out] _cb The callbacks to use for this stream.
+ If there is an error creating the stream, nothing will be
+ filled in here.
+ \param _url The URL to read from.
+ Currently only the <file:>, <http:>, and <https:> schemes
+ are supported.
+ Both <http:> and <https:> may be disabled at compile time,
+ in which case opening such URLs will always fail.
+ Currently this only supports URIs.
+ IRIs should be converted to UTF-8 and URL-escaped, with
+ internationalized domain names encoded in punycode, before
+ passing them to this function.
+ \param ... The \ref url_options "optional flags" to use.
+ This is a variable-length list of options terminated with
+ <code>NULL</code>.
+ \return A stream handle to use with the callbacks, or <code>NULL</code> on
+ error.*/
+OP_WARN_UNUSED_RESULT void *op_url_stream_create(OpusFileCallbacks *_cb,
+ const char *_url,...) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_open_close Opening and Closing*/
+/*@{*/
+/**\name Functions for opening and closing streams
+
+ These functions allow you to test a stream to see if it is Opus, open it,
+ and close it.
+ Several flavors are provided for each of the built-in stream types, plus a
+ more general version which takes a set of application-provided callbacks.*/
+/*@{*/
+
+/**Test to see if this is an Opus stream.
+ For good results, you will need at least 57 bytes (for a pure Opus-only
+ stream).
+ Something like 512 bytes will give more reliable results for multiplexed
+ streams.
+ This function is meant to be a quick-rejection filter.
+ Its purpose is not to guarantee that a stream is a valid Opus stream, but to
+ ensure that it looks enough like Opus that it isn't going to be recognized
+ as some other format (except possibly an Opus stream that is also
+ multiplexed with other codecs, such as video).
+ \param[out] _head The parsed ID header contents.
+ You may pass <code>NULL</code> if you do not need
+ this information.
+ If the function fails, the contents of this structure
+ remain untouched.
+ \param _initial_data An initial buffer of data from the start of the
+ stream.
+ \param _initial_bytes The number of bytes in \a _initial_data.
+ \return 0 if the data appears to be Opus, or a negative value on error.
+ \retval #OP_FALSE There was not enough data to tell if this was an Opus
+ stream or not.
+ \retval #OP_EFAULT An internal memory allocation failed.
+ \retval #OP_EIMPL The stream used a feature that is not implemented,
+ such as an unsupported channel family.
+ \retval #OP_ENOTFORMAT If the data did not contain a recognizable ID
+ header for an Opus stream.
+ \retval #OP_EVERSION If the version field signaled a version this library
+ does not know how to parse.
+ \retval #OP_EBADHEADER The ID header was not properly formatted or contained
+ illegal values.*/
+int op_test(OpusHead *_head,
+ const unsigned char *_initial_data,size_t _initial_bytes);
+
+/**Open a stream from the given file path.
+ \param _path The path to the file to open.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want the
+ failure code.
+ The failure code will be #OP_EFAULT if the file could not
+ be opened, or one of the other failure codes from
+ op_open_callbacks() otherwise.
+ \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_file(const char *_path,int *_error)
+ OP_ARG_NONNULL(1);
+
+/**Open a stream from a memory buffer.
+ \param _data The memory buffer to open.
+ \param _size The number of bytes in the buffer.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want the
+ failure code.
+ See op_open_callbacks() for a full list of failure codes.
+ \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_memory(const unsigned char *_data,
+ size_t _size,int *_error);
+
+/**Open a stream from a URL.
+ This function behaves identically to op_open_url(), except that it
+ takes a va_list instead of a variable number of arguments.
+ It does not call the <code>va_end</code> macro, and because it invokes the
+ <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.
+ \param _url The URL to open.
+ Currently only the <file:>, <http:>, and <https:>
+ schemes are supported.
+ Both <http:> and <https:> may be disabled at compile
+ time, in which case opening such URLs will always
+ fail.
+ Currently this only supports URIs.
+ IRIs should be converted to UTF-8 and URL-escaped,
+ with internationalized domain names encoded in
+ punycode, before passing them to this function.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want
+ the failure code.
+ See op_open_callbacks() for a full list of failure
+ codes.
+ \param[in,out] _ap A list of the \ref url_options "optional flags" to
+ use.
+ This is a variable-length list of options terminated
+ with <code>NULL</code>.
+ \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_vopen_url(const char *_url,
+ int *_error,va_list _ap) OP_ARG_NONNULL(1);
+
+/**Open a stream from a URL.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.
+ \param _url The URL to open.
+ Currently only the <file:>, <http:>, and <https:> schemes
+ are supported.
+ Both <http:> and <https:> may be disabled at compile
+ time, in which case opening such URLs will always fail.
+ Currently this only supports URIs.
+ IRIs should be converted to UTF-8 and URL-escaped, with
+ internationalized domain names encoded in punycode,
+ before passing them to this function.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want the
+ failure code.
+ See op_open_callbacks() for a full list of failure codes.
+ \param ... The \ref url_options "optional flags" to use.
+ This is a variable-length list of options terminated with
+ <code>NULL</code>.
+ \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url,
+ int *_error,...) OP_ARG_NONNULL(1);
+
+/**Open a stream using the given set of callbacks to access it.
+ \param _source The stream to read from (e.g., a <code>FILE *</code>).
+ \param _cb The callbacks with which to access the stream.
+ <code><a href="#op_read_func">read()</a></code> must
+ be implemented.
+ <code><a href="#op_seek_func">seek()</a></code> and
+ <code><a href="#op_tell_func">tell()</a></code> may
+ be <code>NULL</code>, or may always return -1 to
+ indicate a source is unseekable, but if
+ <code><a href="#op_seek_func">seek()</a></code> is
+ implemented and succeeds on a particular source, then
+ <code><a href="#op_tell_func">tell()</a></code> must
+ also.
+ <code><a href="#op_close_func">close()</a></code> may
+ be <code>NULL</code>, but if it is not, it will be
+ called when the \c OggOpusFile is destroyed by
+ op_free().
+ It will not be called if op_open_callbacks() fails
+ with an error.
+ \param _initial_data An initial buffer of data from the start of the
+ stream.
+ Applications can read some number of bytes from the
+ start of the stream to help identify this as an Opus
+ stream, and then provide them here to allow the
+ stream to be opened, even if it is unseekable.
+ \param _initial_bytes The number of bytes in \a _initial_data.
+ If the stream is seekable, its current position (as
+ reported by
+ <code><a href="#opus_tell_func">tell()</a></code>
+ at the start of this function) must be equal to
+ \a _initial_bytes.
+ Otherwise, seeking to absolute positions will
+ generate inconsistent results.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want
+ the failure code.
+ The failure code will be one of
+ <dl>
+ <dt>#OP_EREAD</dt>
+ <dd>An underlying read, seek, or tell operation
+ failed when it should have succeeded, or we failed
+ to find data in the stream we had seen before.</dd>
+ <dt>#OP_EFAULT</dt>
+ <dd>There was a memory allocation failure, or an
+ internal library error.</dd>
+ <dt>#OP_EIMPL</dt>
+ <dd>The stream used a feature that is not
+ implemented, such as an unsupported channel
+ family.</dd>
+ <dt>#OP_EINVAL</dt>
+ <dd><code><a href="#op_seek_func">seek()</a></code>
+ was implemented and succeeded on this source, but
+ <code><a href="#op_tell_func">tell()</a></code>
+ did not, or the starting position indicator was
+ not equal to \a _initial_bytes.</dd>
+ <dt>#OP_ENOTFORMAT</dt>
+ <dd>The stream contained a link that did not have
+ any logical Opus streams in it.</dd>
+ <dt>#OP_EBADHEADER</dt>
+ <dd>A required header packet was not properly
+ formatted, contained illegal values, or was missing
+ altogether.</dd>
+ <dt>#OP_EVERSION</dt>
+ <dd>An ID header contained an unrecognized version
+ number.</dd>
+ <dt>#OP_EBADLINK</dt>
+ <dd>We failed to find data we had seen before after
+ seeking.</dd>
+ <dt>#OP_EBADTIMESTAMP</dt>
+ <dd>The first or last timestamp in a link failed
+ basic validity checks.</dd>
+ </dl>
+ \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.
+ <tt>libopusfile</tt> does <em>not</em> take ownership of the source
+ if the call fails.
+ The calling application is responsible for closing the source if
+ this call returns an error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_source,
+ const OpusFileCallbacks *_cb,const unsigned char *_initial_data,
+ size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2);
+
+/**Partially open a stream from the given file path.
+ \see op_test_callbacks
+ \param _path The path to the file to open.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want the
+ failure code.
+ The failure code will be #OP_EFAULT if the file could not
+ be opened, or one of the other failure codes from
+ op_open_callbacks() otherwise.
+ \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_file(const char *_path,int *_error)
+ OP_ARG_NONNULL(1);
+
+/**Partially open a stream from a memory buffer.
+ \see op_test_callbacks
+ \param _data The memory buffer to open.
+ \param _size The number of bytes in the buffer.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want the
+ failure code.
+ See op_open_callbacks() for a full list of failure codes.
+ \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_memory(const unsigned char *_data,
+ size_t _size,int *_error);
+
+/**Partially open a stream from a URL.
+ This function behaves identically to op_test_url(), except that it
+ takes a va_list instead of a variable number of arguments.
+ It does not call the <code>va_end</code> macro, and because it invokes the
+ <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.
+ \see op_test_url
+ \see op_test_callbacks
+ \param _url The URL to open.
+ Currently only the <file:>, <http:>, and <https:>
+ schemes are supported.
+ Both <http:> and <https:> may be disabled at compile
+ time, in which case opening such URLs will always
+ fail.
+ Currently this only supports URIs.
+ IRIs should be converted to UTF-8 and URL-escaped,
+ with internationalized domain names encoded in
+ punycode, before passing them to this function.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want
+ the failure code.
+ See op_open_callbacks() for a full list of failure
+ codes.
+ \param[in,out] _ap A list of the \ref url_options "optional flags" to
+ use.
+ This is a variable-length list of options terminated
+ with <code>NULL</code>.
+ \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_vtest_url(const char *_url,
+ int *_error,va_list _ap) OP_ARG_NONNULL(1);
+
+/**Partially open a stream from a URL.
+ \note If you use this function, you must link against <tt>libopusurl</tt>.
+ \see op_test_callbacks
+ \param _url The URL to open.
+ Currently only the <file:>, <http:>, and <https:>
+ schemes are supported.
+ Both <http:> and <https:> may be disabled at compile
+ time, in which case opening such URLs will always fail.
+ Currently this only supports URIs.
+ IRIs should be converted to UTF-8 and URL-escaped, with
+ internationalized domain names encoded in punycode,
+ before passing them to this function.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want the
+ failure code.
+ See op_open_callbacks() for a full list of failure
+ codes.
+ \param ... The \ref url_options "optional flags" to use.
+ This is a variable-length list of options terminated
+ with <code>NULL</code>.
+ \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url,
+ int *_error,...) OP_ARG_NONNULL(1);
+
+/**Partially open a stream using the given set of callbacks to access it.
+ This tests for Opusness and loads the headers for the first link.
+ It does not seek (although it tests for seekability).
+ You can query a partially open stream for the few pieces of basic
+ information returned by op_serialno(), op_channel_count(), op_head(), and
+ op_tags() (but only for the first link).
+ You may also determine if it is seekable via a call to op_seekable().
+ You cannot read audio from the stream, seek, get the size or duration,
+ get information from links other than the first one, or even get the total
+ number of links until you finish opening the stream with op_test_open().
+ If you do not need to do any of these things, you can dispose of it with
+ op_free() instead.
+
+ This function is provided mostly to simplify porting existing code that used
+ <tt>libvorbisfile</tt>.
+ For new code, you are likely better off using op_test() instead, which
+ is less resource-intensive, requires less data to succeed, and imposes a
+ hard limit on the amount of data it examines (important for unseekable
+ sources, where all such data must be buffered until you are sure of the
+ stream type).
+ \param _source The stream to read from (e.g., a <code>FILE *</code>).
+ \param _cb The callbacks with which to access the stream.
+ <code><a href="#op_read_func">read()</a></code> must
+ be implemented.
+ <code><a href="#op_seek_func">seek()</a></code> and
+ <code><a href="#op_tell_func">tell()</a></code> may
+ be <code>NULL</code>, or may always return -1 to
+ indicate a source is unseekable, but if
+ <code><a href="#op_seek_func">seek()</a></code> is
+ implemented and succeeds on a particular source, then
+ <code><a href="#op_tell_func">tell()</a></code> must
+ also.
+ <code><a href="#op_close_func">close()</a></code> may
+ be <code>NULL</code>, but if it is not, it will be
+ called when the \c OggOpusFile is destroyed by
+ op_free().
+ It will not be called if op_open_callbacks() fails
+ with an error.
+ \param _initial_data An initial buffer of data from the start of the
+ stream.
+ Applications can read some number of bytes from the
+ start of the stream to help identify this as an Opus
+ stream, and then provide them here to allow the
+ stream to be tested more thoroughly, even if it is
+ unseekable.
+ \param _initial_bytes The number of bytes in \a _initial_data.
+ If the stream is seekable, its current position (as
+ reported by
+ <code><a href="#opus_tell_func">tell()</a></code>
+ at the start of this function) must be equal to
+ \a _initial_bytes.
+ Otherwise, seeking to absolute positions will
+ generate inconsistent results.
+ \param[out] _error Returns 0 on success, or a failure code on error.
+ You may pass in <code>NULL</code> if you don't want
+ the failure code.
+ See op_open_callbacks() for a full list of failure
+ codes.
+ \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.
+ <tt>libopusfile</tt> does <em>not</em> take ownership of the source
+ if the call fails.
+ The calling application is responsible for closing the source if
+ this call returns an error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_source,
+ const OpusFileCallbacks *_cb,const unsigned char *_initial_data,
+ size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2);
+
+/**Finish opening a stream partially opened with op_test_callbacks() or one of
+ the associated convenience functions.
+ If this function fails, you are still responsible for freeing the
+ \c OggOpusFile with op_free().
+ \param _of The \c OggOpusFile to finish opening.
+ \return 0 on success, or a negative value on error.
+ \retval #OP_EREAD An underlying read, seek, or tell operation failed
+ when it should have succeeded.
+ \retval #OP_EFAULT There was a memory allocation failure, or an
+ internal library error.
+ \retval #OP_EIMPL The stream used a feature that is not implemented,
+ such as an unsupported channel family.
+ \retval #OP_EINVAL The stream was not partially opened with
+ op_test_callbacks() or one of the associated
+ convenience functions.
+ \retval #OP_ENOTFORMAT The stream contained a link that did not have any
+ logical Opus streams in it.
+ \retval #OP_EBADHEADER A required header packet was not properly
+ formatted, contained illegal values, or was
+ missing altogether.
+ \retval #OP_EVERSION An ID header contained an unrecognized version
+ number.
+ \retval #OP_EBADLINK We failed to find data we had seen before after
+ seeking.
+ \retval #OP_EBADTIMESTAMP The first or last timestamp in a link failed basic
+ validity checks.*/
+int op_test_open(OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Release all memory used by an \c OggOpusFile.
+ \param _of The \c OggOpusFile to free.*/
+void op_free(OggOpusFile *_of);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_info Stream Information*/
+/*@{*/
+/**\name Functions for obtaining information about streams
+
+ These functions allow you to get basic information about a stream, including
+ seekability, the number of links (for chained streams), plus the size,
+ duration, bitrate, header parameters, and meta information for each link
+ (or, where available, the stream as a whole).
+ Some of these (size, duration) are only available for seekable streams.
+ You can also query the current stream position, link, and playback time,
+ and instantaneous bitrate during playback.
+
+ Some of these functions may be used successfully on the partially open
+ streams returned by op_test_callbacks() or one of the associated
+ convenience functions.
+ Their documention will indicate so explicitly.*/
+/*@{*/
+
+/**Returns whether or not the data source being read is seekable.
+ This is true if
+ <ol>
+ <li>The <code><a href="#op_seek_func">seek()</a></code> and
+ <code><a href="#op_tell_func">tell()</a></code> callbacks are both
+ non-<code>NULL</code>,</li>
+ <li>The <code><a href="#op_seek_func">seek()</a></code> callback was
+ successfully executed at least once, and</li>
+ <li>The <code><a href="#op_tell_func">tell()</a></code> callback was
+ successfully able to report the position indicator afterwards.</li>
+ </ol>
+ This function may be called on partially-opened streams.
+ \param _of The \c OggOpusFile whose seekable status is to be returned.
+ \return A non-zero value if seekable, and 0 if unseekable.*/
+int op_seekable(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Returns the number of links in this chained stream.
+ This function may be called on partially-opened streams, but it will always
+ return 1.
+ The actual number of links is not known until the stream is fully opened.
+ \param _of The \c OggOpusFile from which to retrieve the link count.
+ \return For fully-open seekable sources, this returns the total number of
+ links in the whole stream, which will be at least 1.
+ For partially-open or unseekable sources, this always returns 1.*/
+int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Get the serial number of the given link in a (possibly-chained) Ogg Opus
+ stream.
+ This function may be called on partially-opened streams, but it will always
+ return the serial number of the Opus stream in the first link.
+ \param _of The \c OggOpusFile from which to retrieve the serial number.
+ \param _li The index of the link whose serial number should be retrieved.
+ Use a negative number to get the serial number of the current
+ link.
+ \return The serial number of the given link.
+ If \a _li is greater than the total number of links, this returns
+ the serial number of the last link.
+ If the source is not seekable, this always returns the serial number
+ of the current link.*/
+opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the channel count of the given link in a (possibly-chained) Ogg Opus
+ stream.
+ This is equivalent to <code>op_head(_of,_li)->channel_count</code>, but
+ is provided for convenience.
+ This function may be called on partially-opened streams, but it will always
+ return the channel count of the Opus stream in the first link.
+ \param _of The \c OggOpusFile from which to retrieve the channel count.
+ \param _li The index of the link whose channel count should be retrieved.
+ Use a negative number to get the channel count of the current
+ link.
+ \return The channel count of the given link.
+ If \a _li is greater than the total number of links, this returns
+ the channel count of the last link.
+ If the source is not seekable, this always returns the channel count
+ of the current link.*/
+int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the total (compressed) size of the stream, or of an individual link in
+ a (possibly-chained) Ogg Opus stream, including all headers and Ogg muxing
+ overhead.
+ \warning If the Opus stream (or link) is concurrently multiplexed with other
+ logical streams (e.g., video), this returns the size of the entire stream
+ (or link), not just the number of bytes in the first logical Opus stream.
+ Returning the latter would require scanning the entire file.
+ \param _of The \c OggOpusFile from which to retrieve the compressed size.
+ \param _li The index of the link whose compressed size should be computed.
+ Use a negative number to get the compressed size of the entire
+ stream.
+ \return The compressed size of the entire stream if \a _li is negative, the
+ compressed size of link \a _li if it is non-negative, or a negative
+ value on error.
+ The compressed size of the entire stream may be smaller than that
+ of the underlying source if trailing garbage was detected in the
+ file.
+ \retval #OP_EINVAL The source is not seekable (so we can't know the length),
+ \a _li wasn't less than the total number of links in
+ the stream, or the stream was only partially open.*/
+opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the total PCM length (number of samples at 48 kHz) of the stream, or of
+ an individual link in a (possibly-chained) Ogg Opus stream.
+ Users looking for <code>op_time_total()</code> should use op_pcm_total()
+ instead.
+ Because timestamps in Opus are fixed at 48 kHz, there is no need for a
+ separate function to convert this to seconds (and leaving it out avoids
+ introducing floating point to the API, for those that wish to avoid it).
+ \param _of The \c OggOpusFile from which to retrieve the PCM offset.
+ \param _li The index of the link whose PCM length should be computed.
+ Use a negative number to get the PCM length of the entire stream.
+ \return The PCM length of the entire stream if \a _li is negative, the PCM
+ length of link \a _li if it is non-negative, or a negative value on
+ error.
+ \retval #OP_EINVAL The source is not seekable (so we can't know the length),
+ \a _li wasn't less than the total number of links in
+ the stream, or the stream was only partially open.*/
+ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the ID header information for the given link in a (possibly chained) Ogg
+ Opus stream.
+ This function may be called on partially-opened streams, but it will always
+ return the ID header information of the Opus stream in the first link.
+ \param _of The \c OggOpusFile from which to retrieve the ID header
+ information.
+ \param _li The index of the link whose ID header information should be
+ retrieved.
+ Use a negative number to get the ID header information of the
+ current link.
+ For an unseekable stream, \a _li is ignored, and the ID header
+ information for the current link is always returned, if
+ available.
+ \return The contents of the ID header for the given link.*/
+const OpusHead *op_head(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the comment header information for the given link in a (possibly
+ chained) Ogg Opus stream.
+ This function may be called on partially-opened streams, but it will always
+ return the tags from the Opus stream in the first link.
+ \param _of The \c OggOpusFile from which to retrieve the comment header
+ information.
+ \param _li The index of the link whose comment header information should be
+ retrieved.
+ Use a negative number to get the comment header information of
+ the current link.
+ For an unseekable stream, \a _li is ignored, and the comment
+ header information for the current link is always returned, if
+ available.
+ \return The contents of the comment header for the given link, or
+ <code>NULL</code> if this is an unseekable stream that encountered
+ an invalid link.*/
+const OpusTags *op_tags(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Retrieve the index of the current link.
+ This is the link that produced the data most recently read by
+ op_read_float() or its associated functions, or, after a seek, the link
+ that the seek target landed in.
+ Reading more data may advance the link index (even on the first read after a
+ seek).
+ \param _of The \c OggOpusFile from which to retrieve the current link index.
+ \return The index of the current link on success, or a negative value on
+ failure.
+ For seekable streams, this is a number between 0 and the value
+ returned by op_link_count().
+ For unseekable streams, this value starts at 0 and increments by one
+ each time a new link is encountered (even though op_link_count()
+ always returns 1).
+ \retval #OP_EINVAL The stream was only partially open.*/
+int op_current_link(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Computes the bitrate of the stream, or of an individual link in a
+ (possibly-chained) Ogg Opus stream.
+ The stream must be seekable to compute the bitrate.
+ For unseekable streams, use op_bitrate_instant() to get periodic estimates.
+ \warning If the Opus stream (or link) is concurrently multiplexed with other
+ logical streams (e.g., video), this uses the size of the entire stream (or
+ link) to compute the bitrate, not just the number of bytes in the first
+ logical Opus stream.
+ Returning the latter requires scanning the entire file, but this may be done
+ by decoding the whole file and calling op_bitrate_instant() once at the
+ end.
+ Install a trivial decoding callback with op_set_decode_callback() if you
+ wish to skip actual decoding during this process.
+ \param _of The \c OggOpusFile from which to retrieve the bitrate.
+ \param _li The index of the link whose bitrate should be computed.
+ Use a negative number to get the bitrate of the whole stream.
+ \return The bitrate on success, or a negative value on error.
+ \retval #OP_EINVAL The stream was only partially open, the stream was not
+ seekable, or \a _li was larger than the number of
+ links.*/
+opus_int32 op_bitrate(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Compute the instantaneous bitrate, measured as the ratio of bits to playable
+ samples decoded since a) the last call to op_bitrate_instant(), b) the last
+ seek, or c) the start of playback, whichever was most recent.
+ This will spike somewhat after a seek or at the start/end of a chain
+ boundary, as pre-skip, pre-roll, and end-trimming causes samples to be
+ decoded but not played.
+ \param _of The \c OggOpusFile from which to retrieve the bitrate.
+ \return The bitrate, in bits per second, or a negative value on error.
+ \retval #OP_FALSE No data has been decoded since any of the events
+ described above.
+ \retval #OP_EINVAL The stream was only partially open.*/
+opus_int32 op_bitrate_instant(OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Obtain the current value of the position indicator for \a _of.
+ \param _of The \c OggOpusFile from which to retrieve the position indicator.
+ \return The byte position that is currently being read from.
+ \retval #OP_EINVAL The stream was only partially open.*/
+opus_int64 op_raw_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Obtain the PCM offset of the next sample to be read.
+ If the stream is not properly timestamped, this might not increment by the
+ proper amount between reads, or even return monotonically increasing
+ values.
+ \param _of The \c OggOpusFile from which to retrieve the PCM offset.
+ \return The PCM offset of the next sample to be read.
+ \retval #OP_EINVAL The stream was only partially open.*/
+ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_seeking Seeking*/
+/*@{*/
+/**\name Functions for seeking in Opus streams
+
+ These functions let you seek in Opus streams, if the underlying source
+ support it.
+ Seeking is implemented for all built-in stream I/O routines, though some
+ individual sources may not be seekable (pipes, live HTTP streams, or HTTP
+ streams from a server that does not support <code>Range</code> requests).
+
+ op_raw_seek() is the fastest: it is guaranteed to perform at most one
+ physical seek, but, since the target is a byte position, makes no guarantee
+ how close to a given time it will come.
+ op_pcm_seek() provides sample-accurate seeking.
+ The number of physical seeks it requires is still quite small (often 1 or
+ 2, even in highly variable bitrate streams).
+
+ Seeking in Opus requires decoding some pre-roll amount before playback to
+ allow the internal state to converge (as if recovering from packet loss).
+ This is handled internally by <tt>libopusfile</tt>, but means there is
+ little extra overhead for decoding up to the exact position requested
+ (since it must decode some amount of audio anyway).
+ It also means that decoding after seeking may not return exactly the same
+ values as would be obtained by decoding the stream straight through.
+ However, such differences are expected to be smaller than the loss
+ introduced by Opus's lossy compression.*/
+/*@{*/
+
+/**Seek to a byte offset relative to the <b>compressed</b> data.
+ This also scans packets to update the PCM cursor.
+ It will cross a logical bitstream boundary, but only if it can't get any
+ packets out of the tail of the link to which it seeks.
+ \param _of The \c OggOpusFile in which to seek.
+ \param _byte_offset The byte position to seek to.
+ \return 0 on success, or a negative error code on failure.
+ \retval #OP_EREAD The underlying seek operation failed.
+ \retval #OP_EINVAL The stream was only partially open, or the target was
+ outside the valid range for the stream.
+ \retval #OP_ENOSEEK This stream is not seekable.
+ \retval #OP_EBADLINK Failed to initialize a decoder for a stream for an
+ unknown reason.*/
+int op_raw_seek(OggOpusFile *_of,opus_int64 _byte_offset) OP_ARG_NONNULL(1);
+
+/**Seek to the specified PCM offset, such that decoding will begin at exactly
+ the requested position.
+ \param _of The \c OggOpusFile in which to seek.
+ \param _pcm_offset The PCM offset to seek to.
+ This is in samples at 48 kHz relative to the start of the
+ stream.
+ \return 0 on success, or a negative value on error.
+ \retval #OP_EREAD An underlying read or seek operation failed.
+ \retval #OP_EINVAL The stream was only partially open, or the target was
+ outside the valid range for the stream.
+ \retval #OP_ENOSEEK This stream is not seekable.
+ \retval #OP_EBADLINK We failed to find data we had seen before, or the
+ bitstream structure was sufficiently malformed that
+ seeking to the target destination was impossible.*/
+int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_decoding Decoding*/
+/*@{*/
+/**\name Functions for decoding audio data
+
+ These functions retrieve actual decoded audio data from the stream.
+ The general functions, op_read() and op_read_float() return 16-bit or
+ floating-point output, both using native endian ordering.
+ The number of channels returned can change from link to link in a chained
+ stream.
+ There are special functions, op_read_stereo() and op_read_float_stereo(),
+ which always output two channels, to simplify applications which do not
+ wish to handle multichannel audio.
+ These downmix multichannel files to two channels, so they can always return
+ samples in the same format for every link in a chained file.
+
+ If the rest of your audio processing chain can handle floating point, those
+ routines should be preferred, as floating point output avoids introducing
+ clipping and other issues which might be avoided entirely if, e.g., you
+ scale down the volume at some other stage.
+ However, if you intend to direct consume 16-bit samples, the conversion in
+ <tt>libopusfile</tt> provides noise-shaping dithering and, if compiled
+ against <tt>libopus</tt>&nbsp;1.1 or later, soft-clipping prevention.
+
+ <tt>libopusfile</tt> can also be configured at compile time to use the
+ fixed-point <tt>libopus</tt> API.
+ If so, <tt>libopusfile</tt>'s floating-point API may also be disabled.
+ In that configuration, nothing in <tt>libopusfile</tt> will use any
+ floating-point operations, to simplify support on devices without an
+ adequate FPU.
+
+ \warning HTTPS streams may be be vulnerable to truncation attacks if you do
+ not check the error return code from op_read_float() or its associated
+ functions.
+ If the remote peer does not close the connection gracefully (with a TLS
+ "close notify" message), these functions will return #OP_EREAD instead of 0
+ when they reach the end of the file.
+ If you are reading from an <https:> URL (particularly if seeking is not
+ supported), you should make sure to check for this error and warn the user
+ appropriately.*/
+/*@{*/
+
+/**Indicates that the decoding callback should produce signed 16-bit
+ native-endian output samples.*/
+#define OP_DEC_FORMAT_SHORT (7008)
+/**Indicates that the decoding callback should produce 32-bit native-endian
+ float samples.*/
+#define OP_DEC_FORMAT_FLOAT (7040)
+
+/**Indicates that the decoding callback did not decode anything, and that
+ <tt>libopusfile</tt> should decode normally instead.*/
+#define OP_DEC_USE_DEFAULT (6720)
+
+/**Called to decode an Opus packet.
+ This should invoke the functional equivalent of opus_multistream_decode() or
+ opus_multistream_decode_float(), except that it returns 0 on success
+ instead of the number of decoded samples (which is known a priori).
+ \param _ctx The application-provided callback context.
+ \param _decoder The decoder to use to decode the packet.
+ \param[out] _pcm The buffer to decode into.
+ This will always have enough room for \a _nchannels of
+ \a _nsamples samples, which should be placed into this
+ buffer interleaved.
+ \param _op The packet to decode.
+ This will always have its granule position set to a valid
+ value.
+ \param _nsamples The number of samples expected from the packet.
+ \param _nchannels The number of channels expected from the packet.
+ \param _format The desired sample output format.
+ This is either #OP_DEC_FORMAT_SHORT or
+ #OP_DEC_FORMAT_FLOAT.
+ \param _li The index of the link from which this packet was decoded.
+ \return A non-negative value on success, or a negative value on error.
+ The error codes should be the same as those returned by
+ opus_multistream_decode() or opus_multistream_decode_float().
+ \retval 0 Decoding was successful.
+ The application has filled the buffer with
+ exactly <code>\a _nsamples*\a
+ _nchannels</code> samples in the requested
+ format.
+ \retval #OP_DEC_USE_DEFAULT No decoding was done.
+ <tt>libopusfile</tt> should decode normally
+ instead.*/
+typedef int (*op_decode_cb_func)(void *_ctx,OpusMSDecoder *_decoder,void *_pcm,
+ const ogg_packet *_op,int _nsamples,int _nchannels,int _format,int _li);
+
+/**Sets the packet decode callback function.
+ This is called once for each packet that needs to be decoded.
+ A call to this function is no guarantee that the audio will eventually be
+ delivered to the application.
+ Some or all of the data from the packet may be discarded (i.e., at the
+ beginning or end of a link, or after a seek), however the callback is
+ required to provide all of it.
+ \param _of The \c OggOpusFile on which to set the decode callback.
+ \param _decode_cb The callback function to call.
+ This may be <code>NULL</code> to disable calling the
+ callback.
+ \param _ctx The application-provided context pointer to pass to the
+ callback on each call.*/
+void op_set_decode_callback(OggOpusFile *_of,
+ op_decode_cb_func _decode_cb,void *_ctx) OP_ARG_NONNULL(1);
+
+/**Gain offset type that indicates that the provided offset is relative to the
+ header gain.
+ This is the default.*/
+#define OP_HEADER_GAIN (0)
+
+/**Gain offset type that indicates that the provided offset is relative to the
+ R128_TRACK_GAIN value (if any), in addition to the header gain.*/
+#define OP_TRACK_GAIN (3008)
+
+/**Gain offset type that indicates that the provided offset should be used as
+ the gain directly, without applying any the header or track gains.*/
+#define OP_ABSOLUTE_GAIN (3009)
+
+/**Sets the gain to be used for decoded output.
+ By default, the gain in the header is applied with no additional offset.
+ The total gain (including header gain and/or track gain, if applicable, and
+ this offset), will be clamped to [-32768,32767]/256 dB.
+ This is more than enough to saturate or underflow 16-bit PCM.
+ \note The new gain will not be applied to any already buffered, decoded
+ output.
+ This means you cannot change it sample-by-sample, as at best it will be
+ updated packet-by-packet.
+ It is meant for setting a target volume level, rather than applying smooth
+ fades, etc.
+ \param _of The \c OggOpusFile on which to set the gain offset.
+ \param _gain_type One of #OP_HEADER_GAIN, #OP_TRACK_GAIN, or
+ #OP_ABSOLUTE_GAIN.
+ \param _gain_offset_q8 The gain offset to apply, in 1/256ths of a dB.
+ \return 0 on success or a negative value on error.
+ \retval #OP_EINVAL The \a _gain_type was unrecognized.*/
+int op_set_gain_offset(OggOpusFile *_of,
+ int _gain_type,opus_int32 _gain_offset_q8) OP_ARG_NONNULL(1);
+
+/**Sets whether or not dithering is enabled for 16-bit decoding.
+ By default, when <tt>libopusfile</tt> is compiled to use floating-point
+ internally, calling op_read() or op_read_stereo() will first decode to
+ float, and then convert to fixed-point using noise-shaping dithering.
+ This flag can be used to disable that dithering.
+ When the application uses op_read_float() or op_read_float_stereo(), or when
+ the library has been compiled to decode directly to fixed point, this flag
+ has no effect.
+ \param _of The \c OggOpusFile on which to enable or disable dithering.
+ \param _enabled A non-zero value to enable dithering, or 0 to disable it.*/
+void op_set_dither_enabled(OggOpusFile *_of,int _enabled) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream.
+ \note Although \a _buf_size must indicate the total number of values that
+ can be stored in \a _pcm, the return value is the number of samples
+ <em>per channel</em>.
+ This is done because
+ <ol>
+ <li>The channel count cannot be known a priori (reading more samples might
+ advance us into the next link, with a different channel count), so
+ \a _buf_size cannot also be in units of samples per channel,</li>
+ <li>Returning the samples per channel matches the <code>libopus</code> API
+ as closely as we're able,</li>
+ <li>Returning the total number of values instead of samples per channel
+ would mean the caller would need a division to compute the samples per
+ channel, and might worry about the possibility of getting back samples
+ for some channels and not others, and</li>
+ <li>This approach is relatively fool-proof: if an application passes too
+ small a value to \a _buf_size, they will simply get fewer samples back,
+ and if they assume the return value is the total number of values, then
+ they will simply read too few (rather than reading too many and going
+ off the end of the buffer).</li>
+ </ol>
+ \param _of The \c OggOpusFile from which to read.
+ \param[out] _pcm A buffer in which to store the output PCM samples, as
+ signed native-endian 16-bit values at 48&nbsp;kHz
+ with a nominal range of <code>[-32768,32767)</code>.
+ Multiple channels are interleaved using the
+ <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+ channel ordering</a>.
+ This must have room for at least \a _buf_size values.
+ \param _buf_size The number of values that can be stored in \a _pcm.
+ It is recommended that this be large enough for at
+ least 120 ms of data at 48 kHz per channel (5760
+ values per channel).
+ Smaller buffers will simply return less data, possibly
+ consuming more memory to buffer the data internally.
+ <tt>libopusfile</tt> may return less data than
+ requested.
+ If so, there is no guarantee that the remaining data
+ in \a _pcm will be unmodified.
+ \param[out] _li The index of the link this data was decoded from.
+ You may pass <code>NULL</code> if you do not need this
+ information.
+ If this function fails (returning a negative value),
+ this parameter is left unset.
+ \return The number of samples read per channel on success, or a negative
+ value on failure.
+ The channel count can be retrieved on success by calling
+ <code>op_head(_of,*_li)</code>.
+ The number of samples returned may be 0 if the buffer was too small
+ to store even a single sample for all channels, or if end-of-file
+ was reached.
+ The list of possible failure codes follows.
+ Most of them can only be returned by unseekable, chained streams
+ that encounter a new link.
+ \retval #OP_HOLE There was a hole in the data, and some samples
+ may have been skipped.
+ Call this function again to continue decoding
+ past the hole.
+ \retval #OP_EREAD An underlying read operation failed.
+ This may signal a truncation attack from an
+ <https:> source.
+ \retval #OP_EFAULT An internal memory allocation failed.
+ \retval #OP_EIMPL An unseekable stream encountered a new link that
+ used a feature that is not implemented, such as
+ an unsupported channel family.
+ \retval #OP_EINVAL The stream was only partially open.
+ \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that
+ did not have any logical Opus streams in it.
+ \retval #OP_EBADHEADER An unseekable stream encountered a new link with a
+ required header packet that was not properly
+ formatted, contained illegal values, or was
+ missing altogether.
+ \retval #OP_EVERSION An unseekable stream encountered a new link with
+ an ID header that contained an unrecognized
+ version number.
+ \retval #OP_EBADPACKET Failed to properly decode the next packet.
+ \retval #OP_EBADLINK We failed to find data we had seen before.
+ \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+ a starting timestamp that failed basic validity
+ checks.*/
+OP_WARN_UNUSED_RESULT int op_read(OggOpusFile *_of,
+ opus_int16 *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream.
+ \note Although \a _buf_size must indicate the total number of values that
+ can be stored in \a _pcm, the return value is the number of samples
+ <em>per channel</em>.
+ <ol>
+ <li>The channel count cannot be known a priori (reading more samples might
+ advance us into the next link, with a different channel count), so
+ \a _buf_size cannot also be in units of samples per channel,</li>
+ <li>Returning the samples per channel matches the <code>libopus</code> API
+ as closely as we're able,</li>
+ <li>Returning the total number of values instead of samples per channel
+ would mean the caller would need a division to compute the samples per
+ channel, and might worry about the possibility of getting back samples
+ for some channels and not others, and</li>
+ <li>This approach is relatively fool-proof: if an application passes too
+ small a value to \a _buf_size, they will simply get fewer samples back,
+ and if they assume the return value is the total number of values, then
+ they will simply read too few (rather than reading too many and going
+ off the end of the buffer).</li>
+ </ol>
+ \param _of The \c OggOpusFile from which to read.
+ \param[out] _pcm A buffer in which to store the output PCM samples as
+ signed floats at 48&nbsp;kHz with a nominal range of
+ <code>[-1.0,1.0]</code>.
+ Multiple channels are interleaved using the
+ <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+ channel ordering</a>.
+ This must have room for at least \a _buf_size floats.
+ \param _buf_size The number of floats that can be stored in \a _pcm.
+ It is recommended that this be large enough for at
+ least 120 ms of data at 48 kHz per channel (5760
+ samples per channel).
+ Smaller buffers will simply return less data, possibly
+ consuming more memory to buffer the data internally.
+ If less than \a _buf_size values are returned,
+ <tt>libopusfile</tt> makes no guarantee that the
+ remaining data in \a _pcm will be unmodified.
+ \param[out] _li The index of the link this data was decoded from.
+ You may pass <code>NULL</code> if you do not need this
+ information.
+ If this function fails (returning a negative value),
+ this parameter is left unset.
+ \return The number of samples read per channel on success, or a negative
+ value on failure.
+ The channel count can be retrieved on success by calling
+ <code>op_head(_of,*_li)</code>.
+ The number of samples returned may be 0 if the buffer was too small
+ to store even a single sample for all channels, or if end-of-file
+ was reached.
+ The list of possible failure codes follows.
+ Most of them can only be returned by unseekable, chained streams
+ that encounter a new link.
+ \retval #OP_HOLE There was a hole in the data, and some samples
+ may have been skipped.
+ Call this function again to continue decoding
+ past the hole.
+ \retval #OP_EREAD An underlying read operation failed.
+ This may signal a truncation attack from an
+ <https:> source.
+ \retval #OP_EFAULT An internal memory allocation failed.
+ \retval #OP_EIMPL An unseekable stream encountered a new link that
+ used a feature that is not implemented, such as
+ an unsupported channel family.
+ \retval #OP_EINVAL The stream was only partially open.
+ \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that
+ did not have any logical Opus streams in it.
+ \retval #OP_EBADHEADER An unseekable stream encountered a new link with a
+ required header packet that was not properly
+ formatted, contained illegal values, or was
+ missing altogether.
+ \retval #OP_EVERSION An unseekable stream encountered a new link with
+ an ID header that contained an unrecognized
+ version number.
+ \retval #OP_EBADPACKET Failed to properly decode the next packet.
+ \retval #OP_EBADLINK We failed to find data we had seen before.
+ \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+ a starting timestamp that failed basic validity
+ checks.*/
+OP_WARN_UNUSED_RESULT int op_read_float(OggOpusFile *_of,
+ float *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream and downmixes to stereo, if necessary.
+ This function is intended for simple players that want a uniform output
+ format, even if the channel count changes between links in a chained
+ stream.
+ \note \a _buf_size indicates the total number of values that can be stored
+ in \a _pcm, while the return value is the number of samples <em>per
+ channel</em>, even though the channel count is known, for consistency with
+ op_read().
+ \param _of The \c OggOpusFile from which to read.
+ \param[out] _pcm A buffer in which to store the output PCM samples, as
+ signed native-endian 16-bit values at 48&nbsp;kHz
+ with a nominal range of <code>[-32768,32767)</code>.
+ The left and right channels are interleaved in the
+ buffer.
+ This must have room for at least \a _buf_size values.
+ \param _buf_size The number of values that can be stored in \a _pcm.
+ It is recommended that this be large enough for at
+ least 120 ms of data at 48 kHz per channel (11520
+ values total).
+ Smaller buffers will simply return less data, possibly
+ consuming more memory to buffer the data internally.
+ If less than \a _buf_size values are returned,
+ <tt>libopusfile</tt> makes no guarantee that the
+ remaining data in \a _pcm will be unmodified.
+ \return The number of samples read per channel on success, or a negative
+ value on failure.
+ The number of samples returned may be 0 if the buffer was too small
+ to store even a single sample for both channels, or if end-of-file
+ was reached.
+ The list of possible failure codes follows.
+ Most of them can only be returned by unseekable, chained streams
+ that encounter a new link.
+ \retval #OP_HOLE There was a hole in the data, and some samples
+ may have been skipped.
+ Call this function again to continue decoding
+ past the hole.
+ \retval #OP_EREAD An underlying read operation failed.
+ This may signal a truncation attack from an
+ <https:> source.
+ \retval #OP_EFAULT An internal memory allocation failed.
+ \retval #OP_EIMPL An unseekable stream encountered a new link that
+ used a feature that is not implemented, such as
+ an unsupported channel family.
+ \retval #OP_EINVAL The stream was only partially open.
+ \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that
+ did not have any logical Opus streams in it.
+ \retval #OP_EBADHEADER An unseekable stream encountered a new link with a
+ required header packet that was not properly
+ formatted, contained illegal values, or was
+ missing altogether.
+ \retval #OP_EVERSION An unseekable stream encountered a new link with
+ an ID header that contained an unrecognized
+ version number.
+ \retval #OP_EBADPACKET Failed to properly decode the next packet.
+ \retval #OP_EBADLINK We failed to find data we had seen before.
+ \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+ a starting timestamp that failed basic validity
+ checks.*/
+OP_WARN_UNUSED_RESULT int op_read_stereo(OggOpusFile *_of,
+ opus_int16 *_pcm,int _buf_size) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream and downmixes to stereo, if necessary.
+ This function is intended for simple players that want a uniform output
+ format, even if the channel count changes between links in a chained
+ stream.
+ \note \a _buf_size indicates the total number of values that can be stored
+ in \a _pcm, while the return value is the number of samples <em>per
+ channel</em>, even though the channel count is known, for consistency with
+ op_read_float().
+ \param _of The \c OggOpusFile from which to read.
+ \param[out] _pcm A buffer in which to store the output PCM samples, as
+ signed floats at 48&nbsp;kHz with a nominal range of
+ <code>[-1.0,1.0]</code>.
+ The left and right channels are interleaved in the
+ buffer.
+ This must have room for at least \a _buf_size values.
+ \param _buf_size The number of values that can be stored in \a _pcm.
+ It is recommended that this be large enough for at
+ least 120 ms of data at 48 kHz per channel (11520
+ values total).
+ Smaller buffers will simply return less data, possibly
+ consuming more memory to buffer the data internally.
+ If less than \a _buf_size values are returned,
+ <tt>libopusfile</tt> makes no guarantee that the
+ remaining data in \a _pcm will be unmodified.
+ \return The number of samples read per channel on success, or a negative
+ value on failure.
+ The number of samples returned may be 0 if the buffer was too small
+ to store even a single sample for both channels, or if end-of-file
+ was reached.
+ The list of possible failure codes follows.
+ Most of them can only be returned by unseekable, chained streams
+ that encounter a new link.
+ \retval #OP_HOLE There was a hole in the data, and some samples
+ may have been skipped.
+ Call this function again to continue decoding
+ past the hole.
+ \retval #OP_EREAD An underlying read operation failed.
+ This may signal a truncation attack from an
+ <https:> source.
+ \retval #OP_EFAULT An internal memory allocation failed.
+ \retval #OP_EIMPL An unseekable stream encountered a new link that
+ used a feature that is not implemented, such as
+ an unsupported channel family.
+ \retval #OP_EINVAL The stream was only partially open.
+ \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that
+ that did not have any logical Opus streams in it.
+ \retval #OP_EBADHEADER An unseekable stream encountered a new link with a
+ required header packet that was not properly
+ formatted, contained illegal values, or was
+ missing altogether.
+ \retval #OP_EVERSION An unseekable stream encountered a new link with
+ an ID header that contained an unrecognized
+ version number.
+ \retval #OP_EBADPACKET Failed to properly decode the next packet.
+ \retval #OP_EBADLINK We failed to find data we had seen before.
+ \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+ a starting timestamp that failed basic validity
+ checks.*/
+OP_WARN_UNUSED_RESULT int op_read_float_stereo(OggOpusFile *_of,
+ float *_pcm,int _buf_size) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+# if OP_GNUC_PREREQ(4,0)
+# pragma GCC visibility pop
+# endif
+
+# if defined(__cplusplus)
+}
+# endif
+
+#endif
diff --git a/drivers/opus/repacketizer.c b/drivers/opus/repacketizer.c
new file mode 100644
index 0000000000..01406bb39b
--- /dev/null
+++ b/drivers/opus/repacketizer.c
@@ -0,0 +1,345 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus.h"
+#include "opus_private.h"
+#include "os_support.h"
+
+
+int opus_repacketizer_get_size(void)
+{
+ return sizeof(OpusRepacketizer);
+}
+
+OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp)
+{
+ rp->nb_frames = 0;
+ return rp;
+}
+
+OpusRepacketizer *opus_repacketizer_create(void)
+{
+ OpusRepacketizer *rp;
+ rp=(OpusRepacketizer *)opus_alloc(opus_repacketizer_get_size());
+ if(rp==NULL)return NULL;
+ return opus_repacketizer_init(rp);
+}
+
+void opus_repacketizer_destroy(OpusRepacketizer *rp)
+{
+ opus_free(rp);
+}
+
+static int opus_repacketizer_cat_impl(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len, int self_delimited)
+{
+ unsigned char tmp_toc;
+ int curr_nb_frames,ret;
+ /* Set of check ToC */
+ if (len<1) return OPUS_INVALID_PACKET;
+ if (rp->nb_frames == 0)
+ {
+ rp->toc = data[0];
+ rp->framesize = opus_packet_get_samples_per_frame(data, 8000);
+ } else if ((rp->toc&0xFC) != (data[0]&0xFC))
+ {
+ /*fprintf(stderr, "toc mismatch: 0x%x vs 0x%x\n", rp->toc, data[0]);*/
+ return OPUS_INVALID_PACKET;
+ }
+ curr_nb_frames = opus_packet_get_nb_frames(data, len);
+ if(curr_nb_frames<1) return OPUS_INVALID_PACKET;
+
+ /* Check the 120 ms maximum packet size */
+ if ((curr_nb_frames+rp->nb_frames)*rp->framesize > 960)
+ {
+ return OPUS_INVALID_PACKET;
+ }
+
+ ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames], NULL, NULL);
+ if(ret<1)return ret;
+
+ rp->nb_frames += curr_nb_frames;
+ return OPUS_OK;
+}
+
+int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len)
+{
+ return opus_repacketizer_cat_impl(rp, data, len, 0);
+}
+
+int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp)
+{
+ return rp->nb_frames;
+}
+
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+ unsigned char *data, opus_int32 maxlen, int self_delimited, int pad)
+{
+ int i, count;
+ opus_int32 tot_size;
+ opus_int16 *len;
+ const unsigned char **frames;
+ unsigned char * ptr;
+
+ if (begin<0 || begin>=end || end>rp->nb_frames)
+ {
+ /*fprintf(stderr, "%d %d %d\n", begin, end, rp->nb_frames);*/
+ return OPUS_BAD_ARG;
+ }
+ count = end-begin;
+
+ len = rp->len+begin;
+ frames = rp->frames+begin;
+ if (self_delimited)
+ tot_size = 1 + (len[count-1]>=252);
+ else
+ tot_size = 0;
+
+ ptr = data;
+ if (count==1)
+ {
+ /* Code 0 */
+ tot_size += len[0]+1;
+ if (tot_size > maxlen)
+ return OPUS_BUFFER_TOO_SMALL;
+ *ptr++ = rp->toc&0xFC;
+ } else if (count==2)
+ {
+ if (len[1] == len[0])
+ {
+ /* Code 1 */
+ tot_size += 2*len[0]+1;
+ if (tot_size > maxlen)
+ return OPUS_BUFFER_TOO_SMALL;
+ *ptr++ = (rp->toc&0xFC) | 0x1;
+ } else {
+ /* Code 2 */
+ tot_size += len[0]+len[1]+2+(len[0]>=252);
+ if (tot_size > maxlen)
+ return OPUS_BUFFER_TOO_SMALL;
+ *ptr++ = (rp->toc&0xFC) | 0x2;
+ ptr += encode_size(len[0], ptr);
+ }
+ }
+ if (count > 2 || (pad && tot_size < maxlen))
+ {
+ /* Code 3 */
+ int vbr;
+ int pad_amount=0;
+
+ /* Restart the process for the padding case */
+ ptr = data;
+ if (self_delimited)
+ tot_size = 1 + (len[count-1]>=252);
+ else
+ tot_size = 0;
+ vbr = 0;
+ for (i=1;i<count;i++)
+ {
+ if (len[i] != len[0])
+ {
+ vbr=1;
+ break;
+ }
+ }
+ if (vbr)
+ {
+ tot_size += 2;
+ for (i=0;i<count-1;i++)
+ tot_size += 1 + (len[i]>=252) + len[i];
+ tot_size += len[count-1];
+
+ if (tot_size > maxlen)
+ return OPUS_BUFFER_TOO_SMALL;
+ *ptr++ = (rp->toc&0xFC) | 0x3;
+ *ptr++ = count | 0x80;
+ } else {
+ tot_size += count*len[0]+2;
+ if (tot_size > maxlen)
+ return OPUS_BUFFER_TOO_SMALL;
+ *ptr++ = (rp->toc&0xFC) | 0x3;
+ *ptr++ = count;
+ }
+ pad_amount = pad ? (maxlen-tot_size) : 0;
+ if (pad_amount != 0)
+ {
+ int nb_255s;
+ data[1] |= 0x40;
+ nb_255s = (pad_amount-1)/255;
+ for (i=0;i<nb_255s;i++)
+ *ptr++ = 255;
+ *ptr++ = pad_amount-255*nb_255s-1;
+ tot_size += pad_amount;
+ }
+ if (vbr)
+ {
+ for (i=0;i<count-1;i++)
+ ptr += encode_size(len[i], ptr);
+ }
+ }
+ if (self_delimited) {
+ int sdlen = encode_size(len[count-1], ptr);
+ ptr += sdlen;
+ }
+ /* Copy the actual data */
+ for (i=0;i<count;i++)
+ {
+ /* Using OPUS_MOVE() instead of OPUS_COPY() in case we're doing in-place
+ padding from opus_packet_pad or opus_packet_unpad(). */
+ celt_assert(frames[i] + len[i] <= data || ptr <= frames[i]);
+ OPUS_MOVE(ptr, frames[i], len[i]);
+ ptr += len[i];
+ }
+ if (pad)
+ {
+ for (i=ptr-data;i<maxlen;i++)
+ data[i] = 0;
+ }
+ return tot_size;
+}
+
+opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen)
+{
+ return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0);
+}
+
+opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen)
+{
+ return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0);
+}
+
+int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len)
+{
+ OpusRepacketizer rp;
+ opus_int32 ret;
+ if (len < 1)
+ return OPUS_BAD_ARG;
+ if (len==new_len)
+ return OPUS_OK;
+ else if (len > new_len)
+ return OPUS_BAD_ARG;
+ opus_repacketizer_init(&rp);
+ /* Moving payload to the end of the packet so we can do in-place padding */
+ OPUS_MOVE(data+new_len-len, data, len);
+ opus_repacketizer_cat(&rp, data+new_len-len, len);
+ ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1);
+ if (ret > 0)
+ return OPUS_OK;
+ else
+ return ret;
+}
+
+opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len)
+{
+ OpusRepacketizer rp;
+ opus_int32 ret;
+ if (len < 1)
+ return OPUS_BAD_ARG;
+ opus_repacketizer_init(&rp);
+ ret = opus_repacketizer_cat(&rp, data, len);
+ if (ret < 0)
+ return ret;
+ ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0);
+ celt_assert(ret > 0 && ret <= len);
+ return ret;
+}
+
+int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams)
+{
+ int s;
+ int count;
+ unsigned char toc;
+ opus_int16 size[48];
+ opus_int32 packet_offset;
+ opus_int32 amount;
+
+ if (len < 1)
+ return OPUS_BAD_ARG;
+ if (len==new_len)
+ return OPUS_OK;
+ else if (len > new_len)
+ return OPUS_BAD_ARG;
+ amount = new_len - len;
+ /* Seek to last stream */
+ for (s=0;s<nb_streams-1;s++)
+ {
+ if (len<=0)
+ return OPUS_INVALID_PACKET;
+ count = opus_packet_parse_impl(data, len, 1, &toc, NULL,
+ size, NULL, &packet_offset);
+ if (count<0)
+ return count;
+ data += packet_offset;
+ len -= packet_offset;
+ }
+ return opus_packet_pad(data, len, len+amount);
+}
+
+opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams)
+{
+ int s;
+ unsigned char toc;
+ opus_int16 size[48];
+ opus_int32 packet_offset;
+ OpusRepacketizer rp;
+ unsigned char *dst;
+ opus_int32 dst_len;
+
+ if (len < 1)
+ return OPUS_BAD_ARG;
+ dst = data;
+ dst_len = 0;
+ /* Unpad all frames */
+ for (s=0;s<nb_streams;s++)
+ {
+ opus_int32 ret;
+ int self_delimited = s!=nb_streams-1;
+ if (len<=0)
+ return OPUS_INVALID_PACKET;
+ opus_repacketizer_init(&rp);
+ ret = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
+ size, NULL, &packet_offset);
+ if (ret<0)
+ return ret;
+ ret = opus_repacketizer_cat_impl(&rp, data, packet_offset, self_delimited);
+ if (ret < 0)
+ return ret;
+ ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, dst, len, self_delimited, 0);
+ if (ret < 0)
+ return ret;
+ else
+ dst_len += ret;
+ dst += ret;
+ data += packet_offset;
+ len -= packet_offset;
+ }
+ return dst_len;
+}
+
diff --git a/drivers/opus/repacketizer_demo.c b/drivers/opus/repacketizer_demo.c
new file mode 100644
index 0000000000..1ca9cc3c96
--- /dev/null
+++ b/drivers/opus/repacketizer_demo.c
@@ -0,0 +1,217 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_PACKETOUT 32000
+
+void usage(char *argv0)
+{
+ fprintf(stderr, "usage: %s [options] input_file output_file\n", argv0);
+}
+
+static void int_to_char(opus_uint32 i, unsigned char ch[4])
+{
+ ch[0] = i>>24;
+ ch[1] = (i>>16)&0xFF;
+ ch[2] = (i>>8)&0xFF;
+ ch[3] = i&0xFF;
+}
+
+static opus_uint32 char_to_int(unsigned char ch[4])
+{
+ return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16)
+ | ((opus_uint32)ch[2]<< 8) | (opus_uint32)ch[3];
+}
+
+int main(int argc, char *argv[])
+{
+ int i, eof=0;
+ FILE *fin, *fout;
+ unsigned char packets[48][1500];
+ int len[48];
+ int rng[48];
+ OpusRepacketizer *rp;
+ unsigned char output_packet[MAX_PACKETOUT];
+ int merge = 1, split=0;
+
+ if (argc < 3)
+ {
+ usage(argv[0]);
+ return EXIT_FAILURE;
+ }
+ for (i=1;i<argc-2;i++)
+ {
+ if (strcmp(argv[i], "-merge")==0)
+ {
+ merge = atoi(argv[i+1]);
+ if(merge<1)
+ {
+ fprintf(stderr, "-merge parameter must be at least 1.\n");
+ return EXIT_FAILURE;
+ }
+ if(merge>48)
+ {
+ fprintf(stderr, "-merge parameter must be less than 48.\n");
+ return EXIT_FAILURE;
+ }
+ i++;
+ } else if (strcmp(argv[i], "-split")==0)
+ split = 1;
+ else
+ {
+ fprintf(stderr, "Unknown option: %s\n", argv[i]);
+ usage(argv[0]);
+ return EXIT_FAILURE;
+ }
+ }
+ fin = fopen(argv[argc-2], "r");
+ if(fin==NULL)
+ {
+ fprintf(stderr, "Error opening input file: %s\n", argv[argc-2]);
+ return EXIT_FAILURE;
+ }
+ fout = fopen(argv[argc-1], "w");
+ if(fout==NULL)
+ {
+ fprintf(stderr, "Error opening output file: %s\n", argv[argc-1]);
+ fclose(fin);
+ return EXIT_FAILURE;
+ }
+
+ rp = opus_repacketizer_create();
+ while (!eof)
+ {
+ int err;
+ int nb_packets=merge;
+ opus_repacketizer_init(rp);
+ for (i=0;i<nb_packets;i++)
+ {
+ unsigned char ch[4];
+ err = fread(ch, 1, 4, fin);
+ len[i] = char_to_int(ch);
+ /*fprintf(stderr, "in len = %d\n", len[i]);*/
+ if (len[i]>1500 || len[i]<0)
+ {
+ if (feof(fin))
+ {
+ eof = 1;
+ } else {
+ fprintf(stderr, "Invalid payload length\n");
+ fclose(fin);
+ fclose(fout);
+ return EXIT_FAILURE;
+ }
+ break;
+ }
+ err = fread(ch, 1, 4, fin);
+ rng[i] = char_to_int(ch);
+ err = fread(packets[i], 1, len[i], fin);
+ if (feof(fin))
+ {
+ eof = 1;
+ break;
+ }
+ err = opus_repacketizer_cat(rp, packets[i], len[i]);
+ if (err!=OPUS_OK)
+ {
+ fprintf(stderr, "opus_repacketizer_cat() failed: %s\n", opus_strerror(err));
+ break;
+ }
+ }
+ nb_packets = i;
+
+ if (eof)
+ break;
+
+ if (!split)
+ {
+ err = opus_repacketizer_out(rp, output_packet, MAX_PACKETOUT);
+ if (err>0) {
+ unsigned char int_field[4];
+ int_to_char(err, int_field);
+ if(fwrite(int_field, 1, 4, fout)!=4){
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ int_to_char(rng[nb_packets-1], int_field);
+ if (fwrite(int_field, 1, 4, fout)!=4) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ /*fprintf(stderr, "out len = %d\n", err);*/
+ } else {
+ fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err));
+ }
+ } else {
+ int nb_frames = opus_repacketizer_get_nb_frames(rp);
+ for (i=0;i<nb_frames;i++)
+ {
+ err = opus_repacketizer_out_range(rp, i, i+1, output_packet, MAX_PACKETOUT);
+ if (err>0) {
+ unsigned char int_field[4];
+ int_to_char(err, int_field);
+ if (fwrite(int_field, 1, 4, fout)!=4) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ if (i==nb_frames-1)
+ int_to_char(rng[nb_packets-1], int_field);
+ else
+ int_to_char(0, int_field);
+ if (fwrite(int_field, 1, 4, fout)!=4) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) {
+ fprintf(stderr, "Error writing.\n");
+ return EXIT_FAILURE;
+ }
+ /*fprintf(stderr, "out len = %d\n", err);*/
+ } else {
+ fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err));
+ }
+
+ }
+ }
+ }
+
+ fclose(fin);
+ fclose(fout);
+ return EXIT_SUCCESS;
+}
diff --git a/drivers/opus/silk/A2NLSF.c b/drivers/opus/silk/A2NLSF.c
new file mode 100644
index 0000000000..cec53a5cd8
--- /dev/null
+++ b/drivers/opus/silk/A2NLSF.c
@@ -0,0 +1,252 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/* Conversion between prediction filter coefficients and NLSFs */
+/* Requires the order to be an even number */
+/* A piecewise linear approximation maps LSF <-> cos(LSF) */
+/* Therefore the result is not accurate NLSFs, but the two */
+/* functions are accurate inverses of each other */
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "tables.h"
+
+/* Number of binary divisions, when not in low complexity mode */
+#define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
+#define MAX_ITERATIONS_A2NLSF_FIX 30
+
+/* Helper function for A2NLSF(..) */
+/* Transforms polynomials from cos(n*f) to cos(f)^n */
+static OPUS_INLINE void silk_A2NLSF_trans_poly(
+ opus_int32 *p, /* I/O Polynomial */
+ const opus_int dd /* I Polynomial order (= filter order / 2 ) */
+)
+{
+ opus_int k, n;
+
+ for( k = 2; k <= dd; k++ ) {
+ for( n = dd; n > k; n-- ) {
+ p[ n - 2 ] -= p[ n ];
+ }
+ p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 );
+ }
+}
+/* Helper function for A2NLSF(..) */
+/* Polynomial evaluation */
+static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */
+ opus_int32 *p, /* I Polynomial, Q16 */
+ const opus_int32 x, /* I Evaluation point, Q12 */
+ const opus_int dd /* I Order */
+)
+{
+ opus_int n;
+ opus_int32 x_Q16, y32;
+
+ y32 = p[ dd ]; /* Q16 */
+ x_Q16 = silk_LSHIFT( x, 4 );
+ for( n = dd - 1; n >= 0; n-- ) {
+ y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */
+ }
+ return y32;
+}
+
+static OPUS_INLINE void silk_A2NLSF_init(
+ const opus_int32 *a_Q16,
+ opus_int32 *P,
+ opus_int32 *Q,
+ const opus_int dd
+)
+{
+ opus_int k;
+
+ /* Convert filter coefs to even and odd polynomials */
+ P[dd] = silk_LSHIFT( 1, 16 );
+ Q[dd] = silk_LSHIFT( 1, 16 );
+ for( k = 0; k < dd; k++ ) {
+ P[ k ] = -a_Q16[ dd - k - 1 ] - a_Q16[ dd + k ]; /* Q16 */
+ Q[ k ] = -a_Q16[ dd - k - 1 ] + a_Q16[ dd + k ]; /* Q16 */
+ }
+
+ /* Divide out zeros as we have that for even filter orders, */
+ /* z = 1 is always a root in Q, and */
+ /* z = -1 is always a root in P */
+ for( k = dd; k > 0; k-- ) {
+ P[ k - 1 ] -= P[ k ];
+ Q[ k - 1 ] += Q[ k ];
+ }
+
+ /* Transform polynomials from cos(n*f) to cos(f)^n */
+ silk_A2NLSF_trans_poly( P, dd );
+ silk_A2NLSF_trans_poly( Q, dd );
+}
+
+/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */
+/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
+void silk_A2NLSF(
+ opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
+ opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */
+ const opus_int d /* I Filter order (must be even) */
+)
+{
+ opus_int i, k, m, dd, root_ix, ffrac;
+ opus_int32 xlo, xhi, xmid;
+ opus_int32 ylo, yhi, ymid, thr;
+ opus_int32 nom, den;
+ opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+ opus_int32 Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+ opus_int32 *PQ[ 2 ];
+ opus_int32 *p;
+
+ /* Store pointers to array */
+ PQ[ 0 ] = P;
+ PQ[ 1 ] = Q;
+
+ dd = silk_RSHIFT( d, 1 );
+
+ silk_A2NLSF_init( a_Q16, P, Q, dd );
+
+ /* Find roots, alternating between P and Q */
+ p = P; /* Pointer to polynomial */
+
+ xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/
+ ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+
+ if( ylo < 0 ) {
+ /* Set the first NLSF to zero and move on to the next */
+ NLSF[ 0 ] = 0;
+ p = Q; /* Pointer to polynomial */
+ ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+ root_ix = 1; /* Index of current root */
+ } else {
+ root_ix = 0; /* Index of current root */
+ }
+ k = 1; /* Loop counter */
+ i = 0; /* Counter for bandwidth expansions applied */
+ thr = 0;
+ while( 1 ) {
+ /* Evaluate polynomial */
+ xhi = silk_LSFCosTab_FIX_Q12[ k ]; /* Q12 */
+ yhi = silk_A2NLSF_eval_poly( p, xhi, dd );
+
+ /* Detect zero crossing */
+ if( ( ylo <= 0 && yhi >= thr ) || ( ylo >= 0 && yhi <= -thr ) ) {
+ if( yhi == 0 ) {
+ /* If the root lies exactly at the end of the current */
+ /* interval, look for the next root in the next interval */
+ thr = 1;
+ } else {
+ thr = 0;
+ }
+ /* Binary division */
+ ffrac = -256;
+ for( m = 0; m < BIN_DIV_STEPS_A2NLSF_FIX; m++ ) {
+ /* Evaluate polynomial */
+ xmid = silk_RSHIFT_ROUND( xlo + xhi, 1 );
+ ymid = silk_A2NLSF_eval_poly( p, xmid, dd );
+
+ /* Detect zero crossing */
+ if( ( ylo <= 0 && ymid >= 0 ) || ( ylo >= 0 && ymid <= 0 ) ) {
+ /* Reduce frequency */
+ xhi = xmid;
+ yhi = ymid;
+ } else {
+ /* Increase frequency */
+ xlo = xmid;
+ ylo = ymid;
+ ffrac = silk_ADD_RSHIFT( ffrac, 128, m );
+ }
+ }
+
+ /* Interpolate */
+ if( silk_abs( ylo ) < 65536 ) {
+ /* Avoid dividing by zero */
+ den = ylo - yhi;
+ nom = silk_LSHIFT( ylo, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) + silk_RSHIFT( den, 1 );
+ if( den != 0 ) {
+ ffrac += silk_DIV32( nom, den );
+ }
+ } else {
+ /* No risk of dividing by zero because abs(ylo - yhi) >= abs(ylo) >= 65536 */
+ ffrac += silk_DIV32( ylo, silk_RSHIFT( ylo - yhi, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) );
+ }
+ NLSF[ root_ix ] = (opus_int16)silk_min_32( silk_LSHIFT( (opus_int32)k, 8 ) + ffrac, silk_int16_MAX );
+
+ silk_assert( NLSF[ root_ix ] >= 0 );
+
+ root_ix++; /* Next root */
+ if( root_ix >= d ) {
+ /* Found all roots */
+ break;
+ }
+ /* Alternate pointer to polynomial */
+ p = PQ[ root_ix & 1 ];
+
+ /* Evaluate polynomial */
+ xlo = silk_LSFCosTab_FIX_Q12[ k - 1 ]; /* Q12*/
+ ylo = silk_LSHIFT( 1 - ( root_ix & 2 ), 12 );
+ } else {
+ /* Increment loop counter */
+ k++;
+ xlo = xhi;
+ ylo = yhi;
+ thr = 0;
+
+ if( k > LSF_COS_TAB_SZ_FIX ) {
+ i++;
+ if( i > MAX_ITERATIONS_A2NLSF_FIX ) {
+ /* Set NLSFs to white spectrum and exit */
+ NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 );
+ for( k = 1; k < d; k++ ) {
+ NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] );
+ }
+ return;
+ }
+
+ /* Error: Apply progressively more bandwidth expansion and run again */
+ silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/
+
+ silk_A2NLSF_init( a_Q16, P, Q, dd );
+ p = P; /* Pointer to polynomial */
+ xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/
+ ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+ if( ylo < 0 ) {
+ /* Set the first NLSF to zero and move on to the next */
+ NLSF[ 0 ] = 0;
+ p = Q; /* Pointer to polynomial */
+ ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+ root_ix = 1; /* Index of current root */
+ } else {
+ root_ix = 0; /* Index of current root */
+ }
+ k = 1; /* Reset loop counter */
+ }
+ }
+ }
+}
diff --git a/drivers/opus/silk/API.h b/drivers/opus/silk/API.h
new file mode 100644
index 0000000000..f0601bcf6b
--- /dev/null
+++ b/drivers/opus/silk/API.h
@@ -0,0 +1,133 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_API_H
+#define SILK_API_H
+
+#include "control.h"
+#include "typedef.h"
+#include "errors.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define SILK_MAX_FRAMES_PER_PACKET 3
+
+/* Struct for TOC (Table of Contents) */
+typedef struct {
+ opus_int VADFlag; /* Voice activity for packet */
+ opus_int VADFlags[ SILK_MAX_FRAMES_PER_PACKET ]; /* Voice activity for each frame in packet */
+ opus_int inbandFECFlag; /* Flag indicating if packet contains in-band FEC */
+} silk_TOC_struct;
+
+/****************************************/
+/* Encoder functions */
+/****************************************/
+
+/***********************************************/
+/* Get size in bytes of the Silk encoder state */
+/***********************************************/
+opus_int silk_Get_Encoder_Size( /* O Returns error code */
+ opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */
+);
+
+/*************************/
+/* Init or reset encoder */
+/*************************/
+opus_int silk_InitEncoder( /* O Returns error code */
+ void *encState, /* I/O State */
+ int arch, /* I Run-time architecture */
+ silk_EncControlStruct *encStatus /* O Encoder Status */
+);
+
+/**************************/
+/* Encode frame with Silk */
+/**************************/
+/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */
+/* encControl->payloadSize_ms is set to */
+opus_int silk_Encode( /* O Returns error code */
+ void *encState, /* I/O State */
+ silk_EncControlStruct *encControl, /* I Control status */
+ const opus_int16 *samplesIn, /* I Speech sample input vector */
+ opus_int nSamplesIn, /* I Number of samples in input vector */
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
+ const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
+);
+
+/****************************************/
+/* Decoder functions */
+/****************************************/
+
+/***********************************************/
+/* Get size in bytes of the Silk decoder state */
+/***********************************************/
+opus_int silk_Get_Decoder_Size( /* O Returns error code */
+ opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
+);
+
+/*************************/
+/* Init or Reset decoder */
+/*************************/
+opus_int silk_InitDecoder( /* O Returns error code */
+ void *decState /* I/O State */
+);
+
+/******************/
+/* Decode a frame */
+/******************/
+opus_int silk_Decode( /* O Returns error code */
+ void* decState, /* I/O State */
+ silk_DecControlStruct* decControl, /* I/O Control Structure */
+ opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
+ opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int16 *samplesOut, /* O Decoded output speech vector */
+ opus_int32 *nSamplesOut /* O Number of samples decoded */
+);
+
+#if 0
+/**************************************/
+/* Get table of contents for a packet */
+/**************************************/
+opus_int silk_get_TOC(
+ const opus_uint8 *payload, /* I Payload data */
+ const opus_int nBytesIn, /* I Number of input bytes */
+ const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */
+ silk_TOC_struct *Silk_TOC /* O Type of content */
+);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/CNG.c b/drivers/opus/silk/CNG.c
new file mode 100644
index 0000000000..8b8dbf882c
--- /dev/null
+++ b/drivers/opus/silk/CNG.c
@@ -0,0 +1,172 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/* Generates excitation for CNG LPC synthesis */
+static OPUS_INLINE void silk_CNG_exc(
+ opus_int32 residual_Q10[], /* O CNG residual signal Q10 */
+ opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */
+ opus_int32 Gain_Q16, /* I Gain to apply */
+ opus_int length, /* I Length */
+ opus_int32 *rand_seed /* I/O Seed to random index generator */
+)
+{
+ opus_int32 seed;
+ opus_int i, idx, exc_mask;
+
+ exc_mask = CNG_BUF_MASK_MAX;
+ while( exc_mask > length ) {
+ exc_mask = silk_RSHIFT( exc_mask, 1 );
+ }
+
+ seed = *rand_seed;
+ for( i = 0; i < length; i++ ) {
+ seed = silk_RAND( seed );
+ idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
+ silk_assert( idx >= 0 );
+ silk_assert( idx <= CNG_BUF_MASK_MAX );
+ residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+ }
+ *rand_seed = seed;
+}
+
+void silk_CNG_Reset(
+ silk_decoder_state *psDec /* I/O Decoder state */
+)
+{
+ opus_int i, NLSF_step_Q15, NLSF_acc_Q15;
+
+ NLSF_step_Q15 = silk_DIV32_16( silk_int16_MAX, psDec->LPC_order + 1 );
+ NLSF_acc_Q15 = 0;
+ for( i = 0; i < psDec->LPC_order; i++ ) {
+ NLSF_acc_Q15 += NLSF_step_Q15;
+ psDec->sCNG.CNG_smth_NLSF_Q15[ i ] = NLSF_acc_Q15;
+ }
+ psDec->sCNG.CNG_smth_Gain_Q16 = 0;
+ psDec->sCNG.rand_seed = 3176576;
+}
+
+/* Updates CNG estimate, and applies the CNG when packet was lost */
+void silk_CNG(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int16 frame[], /* I/O Signal */
+ opus_int length /* I Length of residual */
+)
+{
+ opus_int i, subfr;
+ opus_int32 sum_Q6, max_Gain_Q16;
+ opus_int16 A_Q12[ MAX_LPC_ORDER ];
+ silk_CNG_struct *psCNG = &psDec->sCNG;
+ SAVE_STACK;
+
+ if( psDec->fs_kHz != psCNG->fs_kHz ) {
+ /* Reset state */
+ silk_CNG_Reset( psDec );
+
+ psCNG->fs_kHz = psDec->fs_kHz;
+ }
+ if( psDec->lossCnt == 0 && psDec->prevSignalType == TYPE_NO_VOICE_ACTIVITY ) {
+ /* Update CNG parameters */
+
+ /* Smoothing of LSF's */
+ for( i = 0; i < psDec->LPC_order; i++ ) {
+ psCNG->CNG_smth_NLSF_Q15[ i ] += silk_SMULWB( (opus_int32)psDec->prevNLSF_Q15[ i ] - (opus_int32)psCNG->CNG_smth_NLSF_Q15[ i ], CNG_NLSF_SMTH_Q16 );
+ }
+ /* Find the subframe with the highest gain */
+ max_Gain_Q16 = 0;
+ subfr = 0;
+ for( i = 0; i < psDec->nb_subfr; i++ ) {
+ if( psDecCtrl->Gains_Q16[ i ] > max_Gain_Q16 ) {
+ max_Gain_Q16 = psDecCtrl->Gains_Q16[ i ];
+ subfr = i;
+ }
+ }
+ /* Update CNG excitation buffer with excitation from this subframe */
+ silk_memmove( &psCNG->CNG_exc_buf_Q14[ psDec->subfr_length ], psCNG->CNG_exc_buf_Q14, ( psDec->nb_subfr - 1 ) * psDec->subfr_length * sizeof( opus_int32 ) );
+ silk_memcpy( psCNG->CNG_exc_buf_Q14, &psDec->exc_Q14[ subfr * psDec->subfr_length ], psDec->subfr_length * sizeof( opus_int32 ) );
+
+ /* Smooth gains */
+ for( i = 0; i < psDec->nb_subfr; i++ ) {
+ psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 );
+ }
+ }
+
+ /* Add CNG when packet is lost or during DTX */
+ if( psDec->lossCnt ) {
+ VARDECL( opus_int32, CNG_sig_Q10 );
+
+ ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
+
+ /* Generate CNG excitation */
+ silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
+
+ /* Convert CNG NLSF to filter representation */
+ silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
+
+ /* Generate CNG signal, by synthesis filtering */
+ silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+ for( i = 0; i < length; i++ ) {
+ silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+ if( psDec->LPC_order == 16 ) {
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
+ sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
+ }
+
+ /* Update states */
+ CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
+
+ frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
+ }
+ silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+ } else {
+ silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) );
+ }
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/HP_variable_cutoff.c b/drivers/opus/silk/HP_variable_cutoff.c
new file mode 100644
index 0000000000..379752bb19
--- /dev/null
+++ b/drivers/opus/silk/HP_variable_cutoff.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+#include "tuning_parameters.h"
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+ silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */
+)
+{
+ opus_int quality_Q15;
+ opus_int32 pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7;
+ silk_encoder_state *psEncC1 = &state_Fxx[ 0 ].sCmn;
+
+ /* Adaptive cutoff frequency: estimate low end of pitch frequency range */
+ if( psEncC1->prevSignalType == TYPE_VOICED ) {
+ /* difference, in log domain */
+ pitch_freq_Hz_Q16 = silk_DIV32_16( silk_LSHIFT( silk_MUL( psEncC1->fs_kHz, 1000 ), 16 ), psEncC1->prevLag );
+ pitch_freq_log_Q7 = silk_lin2log( pitch_freq_Hz_Q16 ) - ( 16 << 7 );
+
+ /* adjustment based on quality */
+ quality_Q15 = psEncC1->input_quality_bands_Q15[ 0 ];
+ pitch_freq_log_Q7 = silk_SMLAWB( pitch_freq_log_Q7, silk_SMULWB( silk_LSHIFT( -quality_Q15, 2 ), quality_Q15 ),
+ pitch_freq_log_Q7 - ( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ) ) );
+
+ /* delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; */
+ delta_freq_Q7 = pitch_freq_log_Q7 - silk_RSHIFT( psEncC1->variable_HP_smth1_Q15, 8 );
+ if( delta_freq_Q7 < 0 ) {
+ /* less smoothing for decreasing pitch frequency, to track something close to the minimum */
+ delta_freq_Q7 = silk_MUL( delta_freq_Q7, 3 );
+ }
+
+ /* limit delta, to reduce impact of outliers in pitch estimation */
+ delta_freq_Q7 = silk_LIMIT_32( delta_freq_Q7, -SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ), SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ) );
+
+ /* update smoother */
+ psEncC1->variable_HP_smth1_Q15 = silk_SMLAWB( psEncC1->variable_HP_smth1_Q15,
+ silk_SMULBB( psEncC1->speech_activity_Q8, delta_freq_Q7 ), SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF1, 16 ) );
+
+ /* limit frequency range */
+ psEncC1->variable_HP_smth1_Q15 = silk_LIMIT_32( psEncC1->variable_HP_smth1_Q15,
+ silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ),
+ silk_LSHIFT( silk_lin2log( VARIABLE_HP_MAX_CUTOFF_HZ ), 8 ) );
+ }
+}
diff --git a/drivers/opus/silk/Inlines.h b/drivers/opus/silk/Inlines.h
new file mode 100644
index 0000000000..ec986cdfdd
--- /dev/null
+++ b/drivers/opus/silk/Inlines.h
@@ -0,0 +1,188 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/*! \file silk_Inlines.h
+ * \brief silk_Inlines.h defines OPUS_INLINE signal processing functions.
+ */
+
+#ifndef SILK_FIX_INLINES_H
+#define SILK_FIX_INLINES_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* count leading zeros of opus_int64 */
+static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in )
+{
+ opus_int32 in_upper;
+
+ in_upper = (opus_int32)silk_RSHIFT64(in, 32);
+ if (in_upper == 0) {
+ /* Search in the lower 32 bits */
+ return 32 + silk_CLZ32( (opus_int32) in );
+ } else {
+ /* Search in the upper 32 bits */
+ return silk_CLZ32( in_upper );
+ }
+}
+
+/* get number of leading zeros and fractional part (the bits right after the leading one */
+static OPUS_INLINE void silk_CLZ_FRAC(
+ opus_int32 in, /* I input */
+ opus_int32 *lz, /* O number of leading zeros */
+ opus_int32 *frac_Q7 /* O the 7 bits right after the leading one */
+)
+{
+ opus_int32 lzeros = silk_CLZ32(in);
+
+ * lz = lzeros;
+ * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f;
+}
+
+/* Approximation of square root */
+/* Accuracy: < +/- 10% for output values > 15 */
+/* < +/- 2.5% for output values > 120 */
+static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x )
+{
+ opus_int32 y, lz, frac_Q7;
+
+ if( x <= 0 ) {
+ return 0;
+ }
+
+ silk_CLZ_FRAC(x, &lz, &frac_Q7);
+
+ if( lz & 1 ) {
+ y = 32768;
+ } else {
+ y = 46214; /* 46214 = sqrt(2) * 32768 */
+ }
+
+ /* get scaling right */
+ y >>= silk_RSHIFT(lz, 1);
+
+ /* increment using fractional part of input */
+ y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7));
+
+ return y;
+}
+
+/* Divide two int32 values and return result as int32 in a given Q-domain */
+static OPUS_INLINE opus_int32 silk_DIV32_varQ( /* O returns a good approximation of "(a32 << Qres) / b32" */
+ const opus_int32 a32, /* I numerator (Q0) */
+ const opus_int32 b32, /* I denominator (Q0) */
+ const opus_int Qres /* I Q-domain of result (>= 0) */
+)
+{
+ opus_int a_headrm, b_headrm, lshift;
+ opus_int32 b32_inv, a32_nrm, b32_nrm, result;
+
+ silk_assert( b32 != 0 );
+ silk_assert( Qres >= 0 );
+
+ /* Compute number of bits head room and normalize inputs */
+ a_headrm = silk_CLZ32( silk_abs(a32) ) - 1;
+ a32_nrm = silk_LSHIFT(a32, a_headrm); /* Q: a_headrm */
+ b_headrm = silk_CLZ32( silk_abs(b32) ) - 1;
+ b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */
+
+ /* Inverse of b32, with 14 bits of precision */
+ b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */
+
+ /* First approximation */
+ result = silk_SMULWB(a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */
+
+ /* Compute residual by subtracting product of denominator and first approximation */
+ /* It's OK to overflow because the final value of a32_nrm should always be small */
+ a32_nrm = silk_SUB32_ovflw(a32_nrm, silk_LSHIFT_ovflw( silk_SMMUL(b32_nrm, result), 3 )); /* Q: a_headrm */
+
+ /* Refinement */
+ result = silk_SMLAWB(result, a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */
+
+ /* Convert to Qres domain */
+ lshift = 29 + a_headrm - b_headrm - Qres;
+ if( lshift < 0 ) {
+ return silk_LSHIFT_SAT32(result, -lshift);
+ } else {
+ if( lshift < 32){
+ return silk_RSHIFT(result, lshift);
+ } else {
+ /* Avoid undefined result */
+ return 0;
+ }
+ }
+}
+
+/* Invert int32 value and return result as int32 in a given Q-domain */
+static OPUS_INLINE opus_int32 silk_INVERSE32_varQ( /* O returns a good approximation of "(1 << Qres) / b32" */
+ const opus_int32 b32, /* I denominator (Q0) */
+ const opus_int Qres /* I Q-domain of result (> 0) */
+)
+{
+ opus_int b_headrm, lshift;
+ opus_int32 b32_inv, b32_nrm, err_Q32, result;
+
+ silk_assert( b32 != 0 );
+ silk_assert( Qres > 0 );
+
+ /* Compute number of bits head room and normalize input */
+ b_headrm = silk_CLZ32( silk_abs(b32) ) - 1;
+ b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */
+
+ /* Inverse of b32, with 14 bits of precision */
+ b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */
+
+ /* First approximation */
+ result = silk_LSHIFT(b32_inv, 16); /* Q: 61 - b_headrm */
+
+ /* Compute residual by subtracting product of denominator and first approximation from one */
+ err_Q32 = silk_LSHIFT( ((opus_int32)1<<29) - silk_SMULWB(b32_nrm, b32_inv), 3 ); /* Q32 */
+
+ /* Refinement */
+ result = silk_SMLAWW(result, err_Q32, b32_inv); /* Q: 61 - b_headrm */
+
+ /* Convert to Qres domain */
+ lshift = 61 - b_headrm - Qres;
+ if( lshift <= 0 ) {
+ return silk_LSHIFT_SAT32(result, -lshift);
+ } else {
+ if( lshift < 32){
+ return silk_RSHIFT(result, lshift);
+ }else{
+ /* Avoid undefined result */
+ return 0;
+ }
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_FIX_INLINES_H */
diff --git a/drivers/opus/silk/LPC_analysis_filter.c b/drivers/opus/silk/LPC_analysis_filter.c
new file mode 100644
index 0000000000..98ef509e4e
--- /dev/null
+++ b/drivers/opus/silk/LPC_analysis_filter.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "celt_lpc.h"
+
+/*******************************************/
+/* LPC analysis filter */
+/* NB! State is kept internally and the */
+/* filter always starts with zero state */
+/* first d output samples are set to zero */
+/*******************************************/
+
+void silk_LPC_analysis_filter(
+ opus_int16 *out, /* O Output signal */
+ const opus_int16 *in, /* I Input signal */
+ const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
+ const opus_int32 len, /* I Signal length */
+ const opus_int32 d /* I Filter order */
+)
+{
+ opus_int j;
+#ifdef OPUS_FIXED_POINT
+ opus_int16 mem[SILK_MAX_ORDER_LPC];
+ opus_int16 num[SILK_MAX_ORDER_LPC];
+#else
+ int ix;
+ opus_int32 out32_Q12, out32;
+ const opus_int16 *in_ptr;
+#endif
+
+ silk_assert( d >= 6 );
+ silk_assert( (d & 1) == 0 );
+ silk_assert( d <= len );
+
+#ifdef OPUS_FIXED_POINT
+ silk_assert( d <= SILK_MAX_ORDER_LPC );
+ for ( j = 0; j < d; j++ ) {
+ num[ j ] = -B[ j ];
+ }
+ for (j=0;j<d;j++) {
+ mem[ j ] = in[ d - j - 1 ];
+ }
+ celt_fir( in + d, num, out + d, len - d, d, mem );
+ for ( j = 0; j < d; j++ ) {
+ out[ j ] = 0;
+ }
+#else
+ for( ix = d; ix < len; ix++ ) {
+ in_ptr = &in[ ix - 1 ];
+
+ out32_Q12 = silk_SMULBB( in_ptr[ 0 ], B[ 0 ] );
+ /* Allowing wrap around so that two wraps can cancel each other. The rare
+ cases where the result wraps around can only be triggered by invalid streams*/
+ out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -1 ], B[ 1 ] );
+ out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -2 ], B[ 2 ] );
+ out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -3 ], B[ 3 ] );
+ out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -4 ], B[ 4 ] );
+ out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -5 ], B[ 5 ] );
+ for( j = 6; j < d; j += 2 ) {
+ out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j ], B[ j ] );
+ out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j - 1 ], B[ j + 1 ] );
+ }
+
+ /* Subtract prediction */
+ out32_Q12 = silk_SUB32_ovflw( silk_LSHIFT( (opus_int32)in_ptr[ 1 ], 12 ), out32_Q12 );
+
+ /* Scale to Q0 */
+ out32 = silk_RSHIFT_ROUND( out32_Q12, 12 );
+
+ /* Saturate output */
+ out[ ix ] = (opus_int16)silk_SAT16( out32 );
+ }
+
+ /* Set first d output samples to zero */
+ silk_memset( out, 0, d * sizeof( opus_int16 ) );
+#endif
+}
diff --git a/drivers/opus/silk/LPC_inv_pred_gain.c b/drivers/opus/silk/LPC_inv_pred_gain.c
new file mode 100644
index 0000000000..6dc9a49861
--- /dev/null
+++ b/drivers/opus/silk/LPC_inv_pred_gain.c
@@ -0,0 +1,154 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+#define QA 24
+#define A_LIMIT SILK_FIX_CONST( 0.99975, QA )
+
+#define MUL32_FRAC_Q(a32, b32, Q) ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q)))
+
+/* Compute inverse of LPC prediction gain, and */
+/* test if LPC coefficients are stable (all poles within unit circle) */
+static opus_int32 LPC_inverse_pred_gain_QA( /* O Returns inverse prediction gain in energy domain, Q30 */
+ opus_int32 A_QA[ 2 ][ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
+ const opus_int order /* I Prediction order */
+)
+{
+ opus_int k, n, mult2Q;
+ opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA;
+ opus_int32 *Aold_QA, *Anew_QA;
+
+ Anew_QA = A_QA[ order & 1 ];
+
+ invGain_Q30 = (opus_int32)1 << 30;
+ for( k = order - 1; k > 0; k-- ) {
+ /* Check for stability */
+ if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
+ return 0;
+ }
+
+ /* Set RC equal to negated AR coef */
+ rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA );
+
+ /* rc_mult1_Q30 range: [ 1 : 2^30 ] */
+ rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+ silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */
+ silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
+
+ /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
+ mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
+ rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
+
+ /* Update inverse gain */
+ /* invGain_Q30 range: [ 0 : 2^30 ] */
+ invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
+ silk_assert( invGain_Q30 >= 0 );
+ silk_assert( invGain_Q30 <= ( 1 << 30 ) );
+
+ /* Swap pointers */
+ Aold_QA = Anew_QA;
+ Anew_QA = A_QA[ k & 1 ];
+
+ /* Update AR coefficient */
+ for( n = 0; n < k; n++ ) {
+ tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 );
+ Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q );
+ }
+ }
+
+ /* Check for stability */
+ if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
+ return 0;
+ }
+
+ /* Set RC equal to negated AR coef */
+ rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA );
+
+ /* Range: [ 1 : 2^30 ] */
+ rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+
+ /* Update inverse gain */
+ /* Range: [ 0 : 2^30 ] */
+ invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
+ silk_assert( invGain_Q30 >= 0 );
+ silk_assert( invGain_Q30 <= 1<<30 );
+
+ return invGain_Q30;
+}
+
+/* For input in Q12 domain */
+opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */
+ const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
+ const opus_int order /* I Prediction order */
+)
+{
+ opus_int k;
+ opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
+ opus_int32 *Anew_QA;
+ opus_int32 DC_resp = 0;
+
+ Anew_QA = Atmp_QA[ order & 1 ];
+
+ /* Increase Q domain of the AR coefficients */
+ for( k = 0; k < order; k++ ) {
+ DC_resp += (opus_int32)A_Q12[ k ];
+ Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
+ }
+ /* If the DC is unstable, we don't even need to do the full calculations */
+ if( DC_resp >= 4096 ) {
+ return 0;
+ }
+ return LPC_inverse_pred_gain_QA( Atmp_QA, order );
+}
+
+#ifdef OPUS_FIXED_POINT
+
+/* For input in Q24 domain */
+opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */
+ const opus_int32 *A_Q24, /* I Prediction coefficients [order] */
+ const opus_int order /* I Prediction order */
+)
+{
+ opus_int k;
+ opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
+ opus_int32 *Anew_QA;
+
+ Anew_QA = Atmp_QA[ order & 1 ];
+
+ /* Increase Q domain of the AR coefficients */
+ for( k = 0; k < order; k++ ) {
+ Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA );
+ }
+
+ return LPC_inverse_pred_gain_QA( Atmp_QA, order );
+}
+#endif
diff --git a/drivers/opus/silk/LP_variable_cutoff.c b/drivers/opus/silk/LP_variable_cutoff.c
new file mode 100644
index 0000000000..098c19d34f
--- /dev/null
+++ b/drivers/opus/silk/LP_variable_cutoff.c
@@ -0,0 +1,135 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/*
+ Elliptic/Cauer filters designed with 0.1 dB passband ripple,
+ 80 dB minimum stopband attenuation, and
+ [0.95 : 0.15 : 0.35] normalized cut off frequencies.
+*/
+
+#include "silk_main.h"
+
+/* Helper function, interpolates the filter taps */
+static OPUS_INLINE void silk_LP_interpolate_filter_taps(
+ opus_int32 B_Q28[ TRANSITION_NB ],
+ opus_int32 A_Q28[ TRANSITION_NA ],
+ const opus_int ind,
+ const opus_int32 fac_Q16
+)
+{
+ opus_int nb, na;
+
+ if( ind < TRANSITION_INT_NUM - 1 ) {
+ if( fac_Q16 > 0 ) {
+ if( fac_Q16 < 32768 ) { /* fac_Q16 is in range of a 16-bit int */
+ /* Piece-wise linear interpolation of B and A */
+ for( nb = 0; nb < TRANSITION_NB; nb++ ) {
+ B_Q28[ nb ] = silk_SMLAWB(
+ silk_Transition_LP_B_Q28[ ind ][ nb ],
+ silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
+ silk_Transition_LP_B_Q28[ ind ][ nb ],
+ fac_Q16 );
+ }
+ for( na = 0; na < TRANSITION_NA; na++ ) {
+ A_Q28[ na ] = silk_SMLAWB(
+ silk_Transition_LP_A_Q28[ ind ][ na ],
+ silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
+ silk_Transition_LP_A_Q28[ ind ][ na ],
+ fac_Q16 );
+ }
+ } else { /* ( fac_Q16 - ( 1 << 16 ) ) is in range of a 16-bit int */
+ silk_assert( fac_Q16 - ( 1 << 16 ) == silk_SAT16( fac_Q16 - ( 1 << 16 ) ) );
+ /* Piece-wise linear interpolation of B and A */
+ for( nb = 0; nb < TRANSITION_NB; nb++ ) {
+ B_Q28[ nb ] = silk_SMLAWB(
+ silk_Transition_LP_B_Q28[ ind + 1 ][ nb ],
+ silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
+ silk_Transition_LP_B_Q28[ ind ][ nb ],
+ fac_Q16 - ( (opus_int32)1 << 16 ) );
+ }
+ for( na = 0; na < TRANSITION_NA; na++ ) {
+ A_Q28[ na ] = silk_SMLAWB(
+ silk_Transition_LP_A_Q28[ ind + 1 ][ na ],
+ silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
+ silk_Transition_LP_A_Q28[ ind ][ na ],
+ fac_Q16 - ( (opus_int32)1 << 16 ) );
+ }
+ }
+ } else {
+ silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ ind ], TRANSITION_NB * sizeof( opus_int32 ) );
+ silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ ind ], TRANSITION_NA * sizeof( opus_int32 ) );
+ }
+ } else {
+ silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NB * sizeof( opus_int32 ) );
+ silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NA * sizeof( opus_int32 ) );
+ }
+}
+
+/* Low-pass filter with variable cutoff frequency based on */
+/* piece-wise linear interpolation between elliptic filters */
+/* Start by setting psEncC->mode <> 0; */
+/* Deactivate by setting psEncC->mode = 0; */
+void silk_LP_variable_cutoff(
+ silk_LP_state *psLP, /* I/O LP filter state */
+ opus_int16 *frame, /* I/O Low-pass filtered output signal */
+ const opus_int frame_length /* I Frame length */
+)
+{
+ opus_int32 B_Q28[ TRANSITION_NB ], A_Q28[ TRANSITION_NA ], fac_Q16 = 0;
+ opus_int ind = 0;
+
+ silk_assert( psLP->transition_frame_no >= 0 && psLP->transition_frame_no <= TRANSITION_FRAMES );
+
+ /* Run filter if needed */
+ if( psLP->mode != 0 ) {
+ /* Calculate index and interpolation factor for interpolation */
+#if( TRANSITION_INT_STEPS == 64 )
+ fac_Q16 = silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 - 6 );
+#else
+ fac_Q16 = silk_DIV32_16( silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 ), TRANSITION_FRAMES );
+#endif
+ ind = silk_RSHIFT( fac_Q16, 16 );
+ fac_Q16 -= silk_LSHIFT( ind, 16 );
+
+ silk_assert( ind >= 0 );
+ silk_assert( ind < TRANSITION_INT_NUM );
+
+ /* Interpolate filter coefficients */
+ silk_LP_interpolate_filter_taps( B_Q28, A_Q28, ind, fac_Q16 );
+
+ /* Update transition frame number for next frame */
+ psLP->transition_frame_no = silk_LIMIT( psLP->transition_frame_no + psLP->mode, 0, TRANSITION_FRAMES );
+
+ /* ARMA low-pass filtering */
+ silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 );
+ silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1);
+ }
+}
diff --git a/drivers/opus/silk/MacroCount.h b/drivers/opus/silk/MacroCount.h
new file mode 100644
index 0000000000..834817d058
--- /dev/null
+++ b/drivers/opus/silk/MacroCount.h
@@ -0,0 +1,718 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SIGPROCFIX_API_MACROCOUNT_H
+#define SIGPROCFIX_API_MACROCOUNT_H
+#include <stdio.h>
+
+#ifdef silk_MACRO_COUNT
+#define varDefine opus_int64 ops_count = 0;
+
+extern opus_int64 ops_count;
+
+static OPUS_INLINE opus_int64 silk_SaveCount(){
+ return(ops_count);
+}
+
+static OPUS_INLINE opus_int64 silk_SaveResetCount(){
+ opus_int64 ret;
+
+ ret = ops_count;
+ ops_count = 0;
+ return(ret);
+}
+
+static OPUS_INLINE silk_PrintCount(){
+ printf("ops_count = %d \n ", (opus_int32)ops_count);
+}
+
+#undef silk_MUL
+static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
+ opus_int32 ret;
+ ops_count += 4;
+ ret = a32 * b32;
+ return ret;
+}
+
+#undef silk_MUL_uint
+static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){
+ opus_uint32 ret;
+ ops_count += 4;
+ ret = a32 * b32;
+ return ret;
+}
+#undef silk_MLA
+static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+ opus_int32 ret;
+ ops_count += 4;
+ ret = a32 + b32 * c32;
+ return ret;
+}
+
+#undef silk_MLA_uint
+static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){
+ opus_uint32 ret;
+ ops_count += 4;
+ ret = a32 + b32 * c32;
+ return ret;
+}
+
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){
+ opus_int32 ret;
+ ops_count += 5;
+ ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
+ return ret;
+}
+#undef silk_SMLAWB
+static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+ opus_int32 ret;
+ ops_count += 5;
+ ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)));
+ return ret;
+}
+
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){
+ opus_int32 ret;
+ ops_count += 4;
+ ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
+ return ret;
+}
+#undef silk_SMLAWT
+static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+ opus_int32 ret;
+ ops_count += 4;
+ ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
+ return ret;
+}
+
+#undef silk_SMULBB
+static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32);
+ return ret;
+}
+#undef silk_SMLABB
+static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
+ return ret;
+}
+
+#undef silk_SMULBT
+static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
+ opus_int32 ret;
+ ops_count += 4;
+ ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16);
+ return ret;
+}
+
+#undef silk_SMLABT
+static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
+ return ret;
+}
+
+#undef silk_SMULTT
+static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = (a32 >> 16) * (b32 >> 16);
+ return ret;
+}
+
+#undef silk_SMLATT
+static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a32 + (b32 >> 16) * (c32 >> 16);
+ return ret;
+}
+
+
+/* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/
+#undef silk_MLA_ovflw
+#define silk_MLA_ovflw silk_MLA
+
+#undef silk_SMLABB_ovflw
+#define silk_SMLABB_ovflw silk_SMLABB
+
+#undef silk_SMLABT_ovflw
+#define silk_SMLABT_ovflw silk_SMLABT
+
+#undef silk_SMLATT_ovflw
+#define silk_SMLATT_ovflw silk_SMLATT
+
+#undef silk_SMLAWB_ovflw
+#define silk_SMLAWB_ovflw silk_SMLAWB
+
+#undef silk_SMLAWT_ovflw
+#define silk_SMLAWT_ovflw silk_SMLAWT
+
+#undef silk_SMULL
+static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
+ opus_int64 ret;
+ ops_count += 8;
+ ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32));
+ return ret;
+}
+
+#undef silk_SMLAL
+static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){
+ opus_int64 ret;
+ ops_count += 8;
+ ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32));
+ return ret;
+}
+#undef silk_SMLALBB
+static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){
+ opus_int64 ret;
+ ops_count += 4;
+ ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16));
+ return ret;
+}
+
+#undef SigProcFIX_CLZ16
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
+{
+ opus_int32 out32 = 0;
+ ops_count += 10;
+ if( in16 == 0 ) {
+ return 16;
+ }
+ /* test nibbles */
+ if( in16 & 0xFF00 ) {
+ if( in16 & 0xF000 ) {
+ in16 >>= 12;
+ } else {
+ out32 += 4;
+ in16 >>= 8;
+ }
+ } else {
+ if( in16 & 0xFFF0 ) {
+ out32 += 8;
+ in16 >>= 4;
+ } else {
+ out32 += 12;
+ }
+ }
+ /* test bits and return */
+ if( in16 & 0xC ) {
+ if( in16 & 0x8 )
+ return out32 + 0;
+ else
+ return out32 + 1;
+ } else {
+ if( in16 & 0xE )
+ return out32 + 2;
+ else
+ return out32 + 3;
+ }
+}
+
+#undef SigProcFIX_CLZ32
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
+{
+ /* test highest 16 bits and convert to opus_int16 */
+ ops_count += 2;
+ if( in32 & 0xFFFF0000 ) {
+ return SigProcFIX_CLZ16((opus_int16)(in32 >> 16));
+ } else {
+ return SigProcFIX_CLZ16((opus_int16)in32) + 16;
+ }
+}
+
+#undef silk_DIV32
+static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){
+ ops_count += 64;
+ return a32 / b32;
+}
+
+#undef silk_DIV32_16
+static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){
+ ops_count += 32;
+ return a32 / b32;
+}
+
+#undef silk_SAT8
+static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){
+ opus_int8 tmp;
+ ops_count += 1;
+ tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX : \
+ ((a) < silk_int8_MIN ? silk_int8_MIN : (a)));
+ return(tmp);
+}
+
+#undef silk_SAT16
+static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){
+ opus_int16 tmp;
+ ops_count += 1;
+ tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX : \
+ ((a) < silk_int16_MIN ? silk_int16_MIN : (a)));
+ return(tmp);
+}
+#undef silk_SAT32
+static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){
+ opus_int32 tmp;
+ ops_count += 1;
+ tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : \
+ ((a) < silk_int32_MIN ? silk_int32_MIN : (a)));
+ return(tmp);
+}
+#undef silk_POS_SAT32
+static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){
+ opus_int32 tmp;
+ ops_count += 1;
+ tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a));
+ return(tmp);
+}
+
+#undef silk_ADD_POS_SAT8
+static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){
+ opus_int8 tmp;
+ ops_count += 1;
+ tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)));
+ return(tmp);
+}
+#undef silk_ADD_POS_SAT16
+static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
+ opus_int16 tmp;
+ ops_count += 1;
+ tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)));
+ return(tmp);
+}
+
+#undef silk_ADD_POS_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
+ opus_int32 tmp;
+ ops_count += 1;
+ tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)));
+ return(tmp);
+}
+
+#undef silk_ADD_POS_SAT64
+static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
+ opus_int64 tmp;
+ ops_count += 1;
+ tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
+ return(tmp);
+}
+
+#undef silk_LSHIFT8
+static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
+ opus_int8 ret;
+ ops_count += 1;
+ ret = a << shift;
+ return ret;
+}
+#undef silk_LSHIFT16
+static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){
+ opus_int16 ret;
+ ops_count += 1;
+ ret = a << shift;
+ return ret;
+}
+#undef silk_LSHIFT32
+static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a << shift;
+ return ret;
+}
+#undef silk_LSHIFT64
+static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){
+ ops_count += 1;
+ return a << shift;
+}
+
+#undef silk_LSHIFT_ovflw
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){
+ ops_count += 1;
+ return a << shift;
+}
+
+#undef silk_LSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
+ opus_uint32 ret;
+ ops_count += 1;
+ ret = a << shift;
+ return ret;
+}
+
+#undef silk_RSHIFT8
+static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){
+ ops_count += 1;
+ return a >> shift;
+}
+#undef silk_RSHIFT16
+static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){
+ ops_count += 1;
+ return a >> shift;
+}
+#undef silk_RSHIFT32
+static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){
+ ops_count += 1;
+ return a >> shift;
+}
+#undef silk_RSHIFT64
+static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){
+ ops_count += 1;
+ return a >> shift;
+}
+
+#undef silk_RSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){
+ ops_count += 1;
+ return a >> shift;
+}
+
+#undef silk_ADD_LSHIFT
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a + (b << shift);
+ return ret; /* shift >= 0*/
+}
+#undef silk_ADD_LSHIFT32
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a + (b << shift);
+ return ret; /* shift >= 0*/
+}
+#undef silk_ADD_LSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+ opus_uint32 ret;
+ ops_count += 1;
+ ret = a + (b << shift);
+ return ret; /* shift >= 0*/
+}
+#undef silk_ADD_RSHIFT
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a + (b >> shift);
+ return ret; /* shift > 0*/
+}
+#undef silk_ADD_RSHIFT32
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a + (b >> shift);
+ return ret; /* shift > 0*/
+}
+#undef silk_ADD_RSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+ opus_uint32 ret;
+ ops_count += 1;
+ ret = a + (b >> shift);
+ return ret; /* shift > 0*/
+}
+#undef silk_SUB_LSHIFT32
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a - (b << shift);
+ return ret; /* shift >= 0*/
+}
+#undef silk_SUB_RSHIFT32
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a - (b >> shift);
+ return ret; /* shift > 0*/
+}
+
+#undef silk_RSHIFT_ROUND
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
+ opus_int32 ret;
+ ops_count += 3;
+ ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+ return ret;
+}
+
+#undef silk_RSHIFT_ROUND64
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
+ opus_int64 ret;
+ ops_count += 6;
+ ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+ return ret;
+}
+
+#undef silk_abs_int64
+static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){
+ ops_count += 1;
+ return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/
+}
+
+#undef silk_abs_int32
+static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){
+ ops_count += 1;
+ return silk_abs(a);
+}
+
+
+#undef silk_min
+static silk_min(a, b){
+ ops_count += 1;
+ return (((a) < (b)) ? (a) : (b));
+}
+#undef silk_max
+static silk_max(a, b){
+ ops_count += 1;
+ return (((a) > (b)) ? (a) : (b));
+}
+#undef silk_sign
+static silk_sign(a){
+ ops_count += 1;
+ return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ));
+}
+
+#undef silk_ADD16
+static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
+ opus_int16 ret;
+ ops_count += 1;
+ ret = a + b;
+ return ret;
+}
+
+#undef silk_ADD32
+static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a + b;
+ return ret;
+}
+
+#undef silk_ADD64
+static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
+ opus_int64 ret;
+ ops_count += 2;
+ ret = a + b;
+ return ret;
+}
+
+#undef silk_SUB16
+static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
+ opus_int16 ret;
+ ops_count += 1;
+ ret = a - b;
+ return ret;
+}
+
+#undef silk_SUB32
+static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
+ opus_int32 ret;
+ ops_count += 1;
+ ret = a - b;
+ return ret;
+}
+
+#undef silk_SUB64
+static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
+ opus_int64 ret;
+ ops_count += 2;
+ ret = a - b;
+ return ret;
+}
+
+#undef silk_ADD_SAT16
+static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
+ opus_int16 res;
+ /* Nb will be counted in AKP_add32 and silk_SAT16*/
+ res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
+ return res;
+}
+
+#undef silk_ADD_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
+ opus_int32 res;
+ ops_count += 1;
+ res = ((((a32) + (b32)) & 0x80000000) == 0 ? \
+ ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \
+ ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
+ return res;
+}
+
+#undef silk_ADD_SAT64
+static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
+ opus_int64 res;
+ ops_count += 1;
+ res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \
+ ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \
+ ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
+ return res;
+}
+
+#undef silk_SUB_SAT16
+static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
+ opus_int16 res;
+ silk_assert(0);
+ /* Nb will be counted in sub-macros*/
+ res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
+ return res;
+}
+
+#undef silk_SUB_SAT32
+static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
+ opus_int32 res;
+ ops_count += 1;
+ res = ((((a32)-(b32)) & 0x80000000) == 0 ? \
+ (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \
+ ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
+ return res;
+}
+
+#undef silk_SUB_SAT64
+static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
+ opus_int64 res;
+ ops_count += 1;
+ res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \
+ (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \
+ ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
+
+ return res;
+}
+
+#undef silk_SMULWW
+static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
+ opus_int32 ret;
+ /* Nb will be counted in sub-macros*/
+ ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16));
+ return ret;
+}
+
+#undef silk_SMLAWW
+static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+ opus_int32 ret;
+ /* Nb will be counted in sub-macros*/
+ ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16));
+ return ret;
+}
+
+#undef silk_min_int
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
+{
+ ops_count += 1;
+ return (((a) < (b)) ? (a) : (b));
+}
+
+#undef silk_min_16
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+{
+ ops_count += 1;
+ return (((a) < (b)) ? (a) : (b));
+}
+#undef silk_min_32
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+{
+ ops_count += 1;
+ return (((a) < (b)) ? (a) : (b));
+}
+#undef silk_min_64
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+{
+ ops_count += 1;
+ return (((a) < (b)) ? (a) : (b));
+}
+
+/* silk_min() versions with typecast in the function call */
+#undef silk_max_int
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
+{
+ ops_count += 1;
+ return (((a) > (b)) ? (a) : (b));
+}
+#undef silk_max_16
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+{
+ ops_count += 1;
+ return (((a) > (b)) ? (a) : (b));
+}
+#undef silk_max_32
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+{
+ ops_count += 1;
+ return (((a) > (b)) ? (a) : (b));
+}
+
+#undef silk_max_64
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+{
+ ops_count += 1;
+ return (((a) > (b)) ? (a) : (b));
+}
+
+
+#undef silk_LIMIT_int
+static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2)
+{
+ opus_int ret;
+ ops_count += 6;
+
+ ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+
+ return(ret);
+}
+
+#undef silk_LIMIT_16
+static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2)
+{
+ opus_int16 ret;
+ ops_count += 6;
+
+ ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+
+return(ret);
+}
+
+
+#undef silk_LIMIT_32
+static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
+{
+ opus_int32 ret;
+ ops_count += 6;
+
+ ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+ return(ret);
+}
+
+#else
+#define varDefine
+#define silk_SaveCount()
+
+#endif
+#endif
+
diff --git a/drivers/opus/silk/MacroDebug.h b/drivers/opus/silk/MacroDebug.h
new file mode 100644
index 0000000000..35aedc5c5f
--- /dev/null
+++ b/drivers/opus/silk/MacroDebug.h
@@ -0,0 +1,952 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (C) 2012 Xiph.Org Foundation
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef MACRO_DEBUG_H
+#define MACRO_DEBUG_H
+
+/* Redefine macro functions with extensive assertion in DEBUG mode.
+ As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */
+
+#if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT)
+
+#undef silk_ADD16
+#define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){
+ opus_int16 ret;
+
+ ret = a + b;
+ if ( ret != silk_ADD_SAT16( a, b ) )
+ {
+ fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_ADD32
+#define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){
+ opus_int32 ret;
+
+ ret = a + b;
+ if ( ret != silk_ADD_SAT32( a, b ) )
+ {
+ fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_ADD64
+#define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){
+ opus_int64 ret;
+
+ ret = a + b;
+ if ( ret != silk_ADD_SAT64( a, b ) )
+ {
+ fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SUB16
+#define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){
+ opus_int16 ret;
+
+ ret = a - b;
+ if ( ret != silk_SUB_SAT16( a, b ) )
+ {
+ fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SUB32
+#define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){
+ opus_int32 ret;
+
+ ret = a - b;
+ if ( ret != silk_SUB_SAT32( a, b ) )
+ {
+ fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SUB64
+#define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){
+ opus_int64 ret;
+
+ ret = a - b;
+ if ( ret != silk_SUB_SAT64( a, b ) )
+ {
+ fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_ADD_SAT16
+#define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) {
+ opus_int16 res;
+ res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
+ if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) )
+ {
+ fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return res;
+}
+
+#undef silk_ADD_SAT32
+#define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+ opus_int32 res;
+ res = ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ? \
+ ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \
+ ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
+ if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) )
+ {
+ fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return res;
+}
+
+#undef silk_ADD_SAT64
+#define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) {
+ opus_int64 res;
+ int fail = 0;
+ res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \
+ ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \
+ ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
+ if( res != a64 + b64 ) {
+ /* Check that we saturated to the correct extreme value */
+ if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
+ ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ) )
+ {
+ fail = 1;
+ }
+ } else {
+ /* Saturation not necessary */
+ fail = res != a64 + b64;
+ }
+ if ( fail )
+ {
+ fprintf (stderr, "silk_ADD_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return res;
+}
+
+#undef silk_SUB_SAT16
+#define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) {
+ opus_int16 res;
+ res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
+ if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) )
+ {
+ fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return res;
+}
+
+#undef silk_SUB_SAT32
+#define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) {
+ opus_int32 res;
+ res = ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ? \
+ (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \
+ ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
+ if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) )
+ {
+ fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return res;
+}
+
+#undef silk_SUB_SAT64
+#define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) {
+ opus_int64 res;
+ int fail = 0;
+ res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \
+ (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \
+ ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
+ if( res != a64 - b64 ) {
+ /* Check that we saturated to the correct extreme value */
+ if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
+ ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ))
+ {
+ fail = 1;
+ }
+ } else {
+ /* Saturation not necessary */
+ fail = res != a64 - b64;
+ }
+ if ( fail )
+ {
+ fprintf (stderr, "silk_SUB_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return res;
+}
+
+#undef silk_MUL
+#define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){
+ opus_int32 ret;
+ opus_int64 ret64;
+ ret = a32 * b32;
+ ret64 = (opus_int64)a32 * (opus_int64)b32;
+ if ( (opus_int64)ret != ret64 )
+ {
+ fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_MUL_uint
+#define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){
+ opus_uint32 ret;
+ ret = a32 * b32;
+ if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 )
+ {
+ fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_MLA
+#define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+ opus_int32 ret;
+ ret = a32 + b32 * c32;
+ if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
+ {
+ fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_MLA_uint
+#define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){
+ opus_uint32 ret;
+ ret = a32 + b32 * c32;
+ if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
+ {
+ fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SMULWB
+#define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){
+ opus_int32 ret;
+ ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
+ if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 )
+ {
+ fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SMLAWB
+#define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+ opus_int32 ret;
+ ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) );
+ if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) )
+ {
+ fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SMULWT
+#define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){
+ opus_int32 ret;
+ ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
+ if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 )
+ {
+ fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SMLAWT
+#define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+ opus_int32 ret;
+ ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
+ if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) )
+ {
+ fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SMULL
+#define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){
+ opus_int64 ret64;
+ int fail = 0;
+ ret64 = a64 * b64;
+ if( b64 != 0 ) {
+ fail = a64 != (ret64 / b64);
+ } else if( a64 != 0 ) {
+ fail = b64 != (ret64 / a64);
+ }
+ if ( fail )
+ {
+ fprintf (stderr, "silk_SMULL(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret64;
+}
+
+/* no checking needed for silk_SMULBB */
+#undef silk_SMLABB
+#define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+ opus_int32 ret;
+ ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
+ if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 )
+ {
+ fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+/* no checking needed for silk_SMULBT */
+#undef silk_SMLABT
+#define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+ opus_int32 ret;
+ ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
+ if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) )
+ {
+ fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+/* no checking needed for silk_SMULTT */
+#undef silk_SMLATT
+#define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+ opus_int32 ret;
+ ret = a32 + (b32 >> 16) * (c32 >> 16);
+ if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) )
+ {
+ fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_SMULWW
+#define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){
+ opus_int32 ret, tmp1, tmp2;
+ opus_int64 ret64;
+ int fail = 0;
+
+ ret = silk_SMULWB( a32, b32 );
+ tmp1 = silk_RSHIFT_ROUND( b32, 16 );
+ tmp2 = silk_MUL( a32, tmp1 );
+
+ fail |= (opus_int64)tmp2 != (opus_int64) a32 * (opus_int64) tmp1;
+
+ tmp1 = ret;
+ ret = silk_ADD32( tmp1, tmp2 );
+ fail |= silk_ADD32( tmp1, tmp2 ) != silk_ADD_SAT32( tmp1, tmp2 );
+
+ ret64 = silk_RSHIFT64( silk_SMULL( a32, b32 ), 16 );
+ fail |= (opus_int64)ret != ret64;
+
+ if ( fail )
+ {
+ fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+
+ return ret;
+}
+
+#undef silk_SMLAWW
+#define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+ opus_int32 ret, tmp;
+
+ tmp = silk_SMULWW( b32, c32 );
+ ret = silk_ADD32( a32, tmp );
+ if ( ret != silk_ADD_SAT32( a32, tmp ) )
+ {
+ fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
+#undef silk_MLA_ovflw
+#define silk_MLA_ovflw(a32, b32, c32) ((a32) + ((b32) * (c32)))
+#undef silk_SMLABB_ovflw
+#define silk_SMLABB_ovflw(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))
+
+/* no checking needed for silk_SMULL
+ no checking needed for silk_SMLAL
+ no checking needed for silk_SMLALBB
+ no checking needed for SigProcFIX_CLZ16
+ no checking needed for SigProcFIX_CLZ32*/
+
+#undef silk_DIV32
+#define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+ if ( b32 == 0 )
+ {
+ fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a32 / b32;
+}
+
+#undef silk_DIV32_16
+#define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){
+ int fail = 0;
+ fail |= b32 == 0;
+ fail |= b32 > silk_int16_MAX;
+ fail |= b32 < silk_int16_MIN;
+ if ( fail )
+ {
+ fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a32 / b32;
+}
+
+/* no checking needed for silk_SAT8
+ no checking needed for silk_SAT16
+ no checking needed for silk_SAT32
+ no checking needed for silk_POS_SAT32
+ no checking needed for silk_ADD_POS_SAT8
+ no checking needed for silk_ADD_POS_SAT16
+ no checking needed for silk_ADD_POS_SAT32
+ no checking needed for silk_ADD_POS_SAT64 */
+
+#undef silk_LSHIFT8
+#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+ opus_int8 ret;
+ int fail = 0;
+ ret = a << shift;
+ fail |= shift < 0;
+ fail |= shift >= 8;
+ fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+ if ( fail )
+ {
+ fprintf (stderr, "silk_LSHIFT8(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_LSHIFT16
+#define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+ opus_int16 ret;
+ int fail = 0;
+ ret = a << shift;
+ fail |= shift < 0;
+ fail |= shift >= 16;
+ fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+ if ( fail )
+ {
+ fprintf (stderr, "silk_LSHIFT16(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_LSHIFT32
+#define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+ opus_int32 ret;
+ int fail = 0;
+ ret = a << shift;
+ fail |= shift < 0;
+ fail |= shift >= 32;
+ fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+ if ( fail )
+ {
+ fprintf (stderr, "silk_LSHIFT32(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_LSHIFT64
+#define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){
+ opus_int64 ret;
+ int fail = 0;
+ ret = a << shift;
+ fail |= shift < 0;
+ fail |= shift >= 64;
+ fail |= (ret>>shift) != ((opus_int64)a);
+ if ( fail )
+ {
+ fprintf (stderr, "silk_LSHIFT64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_LSHIFT_ovflw
+#define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){
+ if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */
+ {
+ fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a << shift;
+}
+
+#undef silk_LSHIFT_uint
+#define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+ opus_uint32 ret;
+ ret = a << shift;
+ if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift))
+ {
+ fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_RSHIFT8
+#define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+ if ( (shift < 0) || (shift>=8) )
+ {
+ fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a >> shift;
+}
+
+#undef silk_RSHIFT16
+#define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+ if ( (shift < 0) || (shift>=16) )
+ {
+ fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a >> shift;
+}
+
+#undef silk_RSHIFT32
+#define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+ if ( (shift < 0) || (shift>=32) )
+ {
+ fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a >> shift;
+}
+
+#undef silk_RSHIFT64
+#define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){
+ if ( (shift < 0) || (shift>=64) )
+ {
+ fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a >> shift;
+}
+
+#undef silk_RSHIFT_uint
+#define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+ if ( (shift < 0) || (shift>32) )
+ {
+ fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return a >> shift;
+}
+
+#undef silk_ADD_LSHIFT
+#define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){
+ opus_int16 ret;
+ ret = a + (b << shift);
+ if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+ {
+ fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift >= 0 */
+}
+
+#undef silk_ADD_LSHIFT32
+#define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+ opus_int32 ret;
+ ret = a + (b << shift);
+ if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+ {
+ fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift >= 0 */
+}
+
+#undef silk_ADD_LSHIFT_uint
+#define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+ opus_uint32 ret;
+ ret = a + (b << shift);
+ if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+ {
+ fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift >= 0 */
+}
+
+#undef silk_ADD_RSHIFT
+#define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){
+ opus_int16 ret;
+ ret = a + (b >> shift);
+ if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+ {
+ fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift > 0 */
+}
+
+#undef silk_ADD_RSHIFT32
+#define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+ opus_int32 ret;
+ ret = a + (b >> shift);
+ if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+ {
+ fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift > 0 */
+}
+
+#undef silk_ADD_RSHIFT_uint
+#define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+ opus_uint32 ret;
+ ret = a + (b >> shift);
+ if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+ {
+ fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift > 0 */
+}
+
+#undef silk_SUB_LSHIFT32
+#define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+ opus_int32 ret;
+ ret = a - (b << shift);
+ if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) )
+ {
+ fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift >= 0 */
+}
+
+#undef silk_SUB_RSHIFT32
+#define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+ opus_int32 ret;
+ ret = a - (b >> shift);
+ if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) )
+ {
+ fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret; /* shift > 0 */
+}
+
+#undef silk_RSHIFT_ROUND
+#define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){
+ opus_int32 ret;
+ ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+ /* the marco definition can't handle a shift of zero */
+ if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) )
+ {
+ fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return ret;
+}
+
+#undef silk_RSHIFT_ROUND64
+#define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){
+ opus_int64 ret;
+ /* the marco definition can't handle a shift of zero */
+ if ( (shift <= 0) || (shift>=64) )
+ {
+ fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+ return ret;
+}
+
+/* silk_abs is used on floats also, so doesn't work... */
+/*#undef silk_abs
+static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){
+ silk_assert(a != 0x80000000);
+ return (((a) > 0) ? (a) : -(a)); // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN
+}*/
+
+#undef silk_abs_int64
+#define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){
+ if ( a == silk_int64_MIN )
+ {
+ fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
+}
+
+#undef silk_abs_int32
+#define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){
+ if ( a == silk_int32_MIN )
+ {
+ fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return silk_abs(a);
+}
+
+#undef silk_CHECK_FIT8
+#define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){
+ opus_int8 ret;
+ ret = (opus_int8)a;
+ if ( (opus_int64)ret != a )
+ {
+ fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return( ret );
+}
+
+#undef silk_CHECK_FIT16
+#define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){
+ opus_int16 ret;
+ ret = (opus_int16)a;
+ if ( (opus_int64)ret != a )
+ {
+ fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return( ret );
+}
+
+#undef silk_CHECK_FIT32
+#define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){
+ opus_int32 ret;
+ ret = (opus_int32)a;
+ if ( (opus_int64)ret != a )
+ {
+ fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+ silk_assert( 0 );
+#endif
+ }
+ return( ret );
+}
+
+/* no checking for silk_NSHIFT_MUL_32_32
+ no checking for silk_NSHIFT_MUL_16_16
+ no checking needed for silk_min
+ no checking needed for silk_max
+ no checking needed for silk_sign
+*/
+
+#endif
+#endif /* MACRO_DEBUG_H */
diff --git a/drivers/opus/silk/NLSF2A.c b/drivers/opus/silk/NLSF2A.c
new file mode 100644
index 0000000000..2b6f685f49
--- /dev/null
+++ b/drivers/opus/silk/NLSF2A.c
@@ -0,0 +1,178 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* conversion between prediction filter coefficients and LSFs */
+/* order should be even */
+/* a piecewise linear approximation maps LSF <-> cos(LSF) */
+/* therefore the result is not accurate LSFs, but the two */
+/* functions are accurate inverses of each other */
+
+#include "SigProc_FIX.h"
+#include "tables.h"
+
+#define QA 16
+
+/* helper function for NLSF2A(..) */
+static OPUS_INLINE void silk_NLSF2A_find_poly(
+ opus_int32 *out, /* O intermediate polynomial, QA [dd+1] */
+ const opus_int32 *cLSF, /* I vector of interleaved 2*cos(LSFs), QA [d] */
+ opus_int dd /* I polynomial order (= 1/2 * filter order) */
+)
+{
+ opus_int k, n;
+ opus_int32 ftmp;
+
+ out[0] = silk_LSHIFT( 1, QA );
+ out[1] = -cLSF[0];
+ for( k = 1; k < dd; k++ ) {
+ ftmp = cLSF[2*k]; /* QA*/
+ out[k+1] = silk_LSHIFT( out[k-1], 1 ) - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[k] ), QA );
+ for( n = k; n > 1; n-- ) {
+ out[n] += out[n-2] - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[n-1] ), QA );
+ }
+ out[1] -= ftmp;
+ }
+}
+
+/* compute whitening filter coefficients from normalized line spectral frequencies */
+void silk_NLSF2A(
+ opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
+ const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
+ const opus_int d /* I filter order (should be even) */
+)
+{
+ /* This ordering was found to maximize quality. It improves numerical accuracy of
+ silk_NLSF2A_find_poly() compared to "standard" ordering. */
+ static const unsigned char ordering16[16] = {
+ 0, 15, 8, 7, 4, 11, 12, 3, 2, 13, 10, 5, 6, 9, 14, 1
+ };
+ static const unsigned char ordering10[10] = {
+ 0, 9, 6, 3, 4, 5, 8, 1, 2, 7
+ };
+ const unsigned char *ordering;
+ opus_int k, i, dd;
+ opus_int32 cos_LSF_QA[ SILK_MAX_ORDER_LPC ];
+ opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+ opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta;
+ opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ];
+ opus_int32 maxabs, absval, idx=0, sc_Q16;
+
+ silk_assert( LSF_COS_TAB_SZ_FIX == 128 );
+ silk_assert( d==10||d==16 );
+
+ /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */
+ ordering = d == 16 ? ordering16 : ordering10;
+ for( k = 0; k < d; k++ ) {
+ silk_assert(NLSF[k] >= 0 );
+
+ /* f_int on a scale 0-127 (rounded down) */
+ f_int = silk_RSHIFT( NLSF[k], 15 - 7 );
+
+ /* f_frac, range: 0..255 */
+ f_frac = NLSF[k] - silk_LSHIFT( f_int, 15 - 7 );
+
+ silk_assert(f_int >= 0);
+ silk_assert(f_int < LSF_COS_TAB_SZ_FIX );
+
+ /* Read start and end value from table */
+ cos_val = silk_LSFCosTab_FIX_Q12[ f_int ]; /* Q12 */
+ delta = silk_LSFCosTab_FIX_Q12[ f_int + 1 ] - cos_val; /* Q12, with a range of 0..200 */
+
+ /* Linear interpolation */
+ cos_LSF_QA[ordering[k]] = silk_RSHIFT_ROUND( silk_LSHIFT( cos_val, 8 ) + silk_MUL( delta, f_frac ), 20 - QA ); /* QA */
+ }
+
+ dd = silk_RSHIFT( d, 1 );
+
+ /* generate even and odd polynomials using convolution */
+ silk_NLSF2A_find_poly( P, &cos_LSF_QA[ 0 ], dd );
+ silk_NLSF2A_find_poly( Q, &cos_LSF_QA[ 1 ], dd );
+
+ /* convert even and odd polynomials to opus_int32 Q12 filter coefs */
+ for( k = 0; k < dd; k++ ) {
+ Ptmp = P[ k+1 ] + P[ k ];
+ Qtmp = Q[ k+1 ] - Q[ k ];
+
+ /* the Ptmp and Qtmp values at this stage need to fit in int32 */
+ a32_QA1[ k ] = -Qtmp - Ptmp; /* QA+1 */
+ a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */
+ }
+
+ /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
+ for( i = 0; i < 10; i++ ) {
+ /* Find maximum absolute value and its index */
+ maxabs = 0;
+ for( k = 0; k < d; k++ ) {
+ absval = silk_abs( a32_QA1[k] );
+ if( absval > maxabs ) {
+ maxabs = absval;
+ idx = k;
+ }
+ }
+ maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */
+
+ if( maxabs > silk_int16_MAX ) {
+ /* Reduce magnitude of prediction coefficients */
+ maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
+ sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
+ silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
+ silk_bwexpander_32( a32_QA1, d, sc_Q16 );
+ } else {
+ break;
+ }
+ }
+
+ if( i == 10 ) {
+ /* Reached the last iteration, clip the coefficients */
+ for( k = 0; k < d; k++ ) {
+ a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */
+ a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 );
+ }
+ } else {
+ for( k = 0; k < d; k++ ) {
+ a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
+ }
+ }
+
+ for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
+ if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
+ /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */
+ /* on the unscaled coefficients, convert to Q12 and measure again */
+ silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
+ for( k = 0; k < d; k++ ) {
+ a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
+ }
+ } else {
+ break;
+ }
+ }
+}
+
diff --git a/drivers/opus/silk/NLSF_VQ.c b/drivers/opus/silk/NLSF_VQ.c
new file mode 100644
index 0000000000..e4ca79fbfe
--- /dev/null
+++ b/drivers/opus/silk/NLSF_VQ.c
@@ -0,0 +1,68 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
+void silk_NLSF_VQ(
+ opus_int32 err_Q26[], /* O Quantization errors [K] */
+ const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */
+ const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */
+ const opus_int K, /* I Number of codebook vectors */
+ const opus_int LPC_order /* I Number of LPCs */
+)
+{
+ opus_int i, m;
+ opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26;
+
+ silk_assert( LPC_order <= 16 );
+ silk_assert( ( LPC_order & 1 ) == 0 );
+
+ /* Loop over codebook */
+ for( i = 0; i < K; i++ ) {
+ sum_error_Q26 = 0;
+ for( m = 0; m < LPC_order; m += 2 ) {
+ /* Compute weighted squared quantization error for index m */
+ diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
+ sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 );
+
+ /* Compute weighted squared quantization error for index m + 1 */
+ diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
+ sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 );
+
+ sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 );
+
+ silk_assert( sum_error_Q26 >= 0 );
+ silk_assert( sum_error_Q30 >= 0 );
+ }
+ err_Q26[ i ] = sum_error_Q26;
+ }
+}
diff --git a/drivers/opus/silk/NLSF_VQ_weights_laroia.c b/drivers/opus/silk/NLSF_VQ_weights_laroia.c
new file mode 100644
index 0000000000..f461ba01c0
--- /dev/null
+++ b/drivers/opus/silk/NLSF_VQ_weights_laroia.c
@@ -0,0 +1,80 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "define.h"
+#include "SigProc_FIX.h"
+
+/*
+R. Laroia, N. Phamdo and N. Farvardin, "Robust and Efficient Quantization of Speech LSP
+Parameters Using Structured Vector Quantization", Proc. IEEE Int. Conf. Acoust., Speech,
+Signal Processing, pp. 641-644, 1991.
+*/
+
+/* Laroia low complexity NLSF weights */
+void silk_NLSF_VQ_weights_laroia(
+ opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */
+ const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */
+ const opus_int D /* I Input vector dimension (even) */
+)
+{
+ opus_int k;
+ opus_int32 tmp1_int, tmp2_int;
+
+ silk_assert( D > 0 );
+ silk_assert( ( D & 1 ) == 0 );
+
+ /* First value */
+ tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 );
+ tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+ tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 );
+ tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int );
+ pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+ silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 );
+
+ /* Main loop */
+ for( k = 1; k < D - 1; k += 2 ) {
+ tmp1_int = silk_max_int( pNLSF_Q15[ k + 1 ] - pNLSF_Q15[ k ], 1 );
+ tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+ pNLSFW_Q_OUT[ k ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+ silk_assert( pNLSFW_Q_OUT[ k ] > 0 );
+
+ tmp2_int = silk_max_int( pNLSF_Q15[ k + 2 ] - pNLSF_Q15[ k + 1 ], 1 );
+ tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int );
+ pNLSFW_Q_OUT[ k + 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+ silk_assert( pNLSFW_Q_OUT[ k + 1 ] > 0 );
+ }
+
+ /* Last value */
+ tmp1_int = silk_max_int( ( 1 << 15 ) - pNLSF_Q15[ D - 1 ], 1 );
+ tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+ pNLSFW_Q_OUT[ D - 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+ silk_assert( pNLSFW_Q_OUT[ D - 1 ] > 0 );
+}
diff --git a/drivers/opus/silk/NLSF_decode.c b/drivers/opus/silk/NLSF_decode.c
new file mode 100644
index 0000000000..786a62d278
--- /dev/null
+++ b/drivers/opus/silk/NLSF_decode.c
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Predictive dequantizer for NLSF residuals */
+static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */
+ opus_int16 x_Q10[], /* O Output [ order ] */
+ const opus_int8 indices[], /* I Quantization indices [ order ] */
+ const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */
+ const opus_int quant_step_size_Q16, /* I Quantization step size */
+ const opus_int16 order /* I Number of input values */
+)
+{
+ opus_int i, out_Q10, pred_Q10;
+
+ out_Q10 = 0;
+ for( i = order-1; i >= 0; i-- ) {
+ pred_Q10 = silk_RSHIFT( silk_SMULBB( out_Q10, (opus_int16)pred_coef_Q8[ i ] ), 8 );
+ out_Q10 = silk_LSHIFT( indices[ i ], 10 );
+ if( out_Q10 > 0 ) {
+ out_Q10 = silk_SUB16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ } else if( out_Q10 < 0 ) {
+ out_Q10 = silk_ADD16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ }
+ out_Q10 = silk_SMLAWB( pred_Q10, (opus_int32)out_Q10, quant_step_size_Q16 );
+ x_Q10[ i ] = out_Q10;
+ }
+}
+
+
+/***********************/
+/* NLSF vector decoder */
+/***********************/
+void silk_NLSF_decode(
+ opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */
+ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */
+ const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */
+)
+{
+ opus_int i;
+ opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+ opus_int16 ec_ix[ MAX_LPC_ORDER ];
+ opus_int16 res_Q10[ MAX_LPC_ORDER ];
+ opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
+ opus_int32 W_tmp_Q9, NLSF_Q15_tmp;
+ const opus_uint8 *pCB_element;
+
+ /* Decode first stage */
+ pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
+ for( i = 0; i < psNLSF_CB->order; i++ ) {
+ pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 );
+ }
+
+ /* Unpack entropy table indices and predictor for current CB1 index */
+ silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] );
+
+ /* Predictive residual dequantizer */
+ silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order );
+
+ /* Weights from codebook vector */
+ silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order );
+
+ /* Apply inverse square-rooted weights and add to output */
+ for( i = 0; i < psNLSF_CB->order; i++ ) {
+ W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
+ NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) );
+ pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 );
+ }
+
+ /* NLSF stabilization */
+ silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
+}
diff --git a/drivers/opus/silk/NLSF_del_dec_quant.c b/drivers/opus/silk/NLSF_del_dec_quant.c
new file mode 100644
index 0000000000..b74585370c
--- /dev/null
+++ b/drivers/opus/silk/NLSF_del_dec_quant.c
@@ -0,0 +1,207 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Delayed-decision quantizer for NLSF residuals */
+opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */
+ opus_int8 indices[], /* O Quantization indices [ order ] */
+ const opus_int16 x_Q10[], /* I Input [ order ] */
+ const opus_int16 w_Q5[], /* I Weights [ order ] */
+ const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */
+ const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */
+ const opus_uint8 ec_rates_Q5[], /* I Rates [] */
+ const opus_int quant_step_size_Q16, /* I Quantization step size */
+ const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */
+ const opus_int32 mu_Q20, /* I R/D tradeoff */
+ const opus_int16 order /* I Number of input values */
+)
+{
+ opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
+ opus_int pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
+ opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
+ opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ];
+ opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
+ opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
+ opus_int32 RD_Q25[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
+ opus_int32 RD_min_Q25[ NLSF_QUANT_DEL_DEC_STATES ];
+ opus_int32 RD_max_Q25[ NLSF_QUANT_DEL_DEC_STATES ];
+ const opus_uint8 *rates_Q5;
+
+ silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */
+
+ nStates = 1;
+ RD_Q25[ 0 ] = 0;
+ prev_out_Q10[ 0 ] = 0;
+ for( i = order - 1; ; i-- ) {
+ rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
+ pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
+ in_Q10 = x_Q10[ i ];
+ for( j = 0; j < nStates; j++ ) {
+ pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
+ res_Q10 = silk_SUB16( in_Q10, pred_Q10 );
+ ind_tmp = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
+ ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
+ ind[ j ][ i ] = (opus_int8)ind_tmp;
+
+ /* compute outputs for ind_tmp and ind_tmp + 1 */
+ out0_Q10 = silk_LSHIFT( ind_tmp, 10 );
+ out1_Q10 = silk_ADD16( out0_Q10, 1024 );
+ if( ind_tmp > 0 ) {
+ out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ } else if( ind_tmp == 0 ) {
+ out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ } else if( ind_tmp == -1 ) {
+ out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ } else {
+ out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+ }
+ out0_Q10 = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
+ out1_Q10 = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+ out0_Q10 = silk_ADD16( out0_Q10, pred_Q10 );
+ out1_Q10 = silk_ADD16( out1_Q10, pred_Q10 );
+ prev_out_Q10[ j ] = out0_Q10;
+ prev_out_Q10[ j + nStates ] = out1_Q10;
+
+ /* compute RD for ind_tmp and ind_tmp + 1 */
+ if( ind_tmp + 1 >= NLSF_QUANT_MAX_AMPLITUDE ) {
+ if( ind_tmp + 1 == NLSF_QUANT_MAX_AMPLITUDE ) {
+ rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ];
+ rate1_Q5 = 280;
+ } else {
+ rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, 43, ind_tmp );
+ rate1_Q5 = silk_ADD16( rate0_Q5, 43 );
+ }
+ } else if( ind_tmp <= -NLSF_QUANT_MAX_AMPLITUDE ) {
+ if( ind_tmp == -NLSF_QUANT_MAX_AMPLITUDE ) {
+ rate0_Q5 = 280;
+ rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
+ } else {
+ rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, -43, ind_tmp );
+ rate1_Q5 = silk_SUB16( rate0_Q5, 43 );
+ }
+ } else {
+ rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ];
+ rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
+ }
+ RD_tmp_Q25 = RD_Q25[ j ];
+ diff_Q10 = silk_SUB16( in_Q10, out0_Q10 );
+ RD_Q25[ j ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 );
+ diff_Q10 = silk_SUB16( in_Q10, out1_Q10 );
+ RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
+ }
+
+ if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
+ /* double number of states and copy */
+ for( j = 0; j < nStates; j++ ) {
+ ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
+ }
+ nStates = silk_LSHIFT( nStates, 1 );
+ for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+ ind[ j ][ i ] = ind[ j - nStates ][ i ];
+ }
+ } else if( i > 0 ) {
+ /* sort lower and upper half of RD_Q25, pairwise */
+ for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+ if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) {
+ RD_max_Q25[ j ] = RD_Q25[ j ];
+ RD_min_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
+ RD_Q25[ j ] = RD_min_Q25[ j ];
+ RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] = RD_max_Q25[ j ];
+ /* swap prev_out values */
+ out0_Q10 = prev_out_Q10[ j ];
+ prev_out_Q10[ j ] = prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ];
+ prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ] = out0_Q10;
+ ind_sort[ j ] = j + NLSF_QUANT_DEL_DEC_STATES;
+ } else {
+ RD_min_Q25[ j ] = RD_Q25[ j ];
+ RD_max_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
+ ind_sort[ j ] = j;
+ }
+ }
+ /* compare the highest RD values of the winning half with the lowest one in the losing half, and copy if necessary */
+ /* afterwards ind_sort[] will contain the indices of the NLSF_QUANT_DEL_DEC_STATES winning RD values */
+ while( 1 ) {
+ min_max_Q25 = silk_int32_MAX;
+ max_min_Q25 = 0;
+ ind_min_max = 0;
+ ind_max_min = 0;
+ for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+ if( min_max_Q25 > RD_max_Q25[ j ] ) {
+ min_max_Q25 = RD_max_Q25[ j ];
+ ind_min_max = j;
+ }
+ if( max_min_Q25 < RD_min_Q25[ j ] ) {
+ max_min_Q25 = RD_min_Q25[ j ];
+ ind_max_min = j;
+ }
+ }
+ if( min_max_Q25 >= max_min_Q25 ) {
+ break;
+ }
+ /* copy ind_min_max to ind_max_min */
+ ind_sort[ ind_max_min ] = ind_sort[ ind_min_max ] ^ NLSF_QUANT_DEL_DEC_STATES;
+ RD_Q25[ ind_max_min ] = RD_Q25[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
+ prev_out_Q10[ ind_max_min ] = prev_out_Q10[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
+ RD_min_Q25[ ind_max_min ] = 0;
+ RD_max_Q25[ ind_min_max ] = silk_int32_MAX;
+ silk_memcpy( ind[ ind_max_min ], ind[ ind_min_max ], MAX_LPC_ORDER * sizeof( opus_int8 ) );
+ }
+ /* increment index if it comes from the upper half */
+ for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+ ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 );
+ }
+ } else { /* i == 0 */
+ break;
+ }
+ }
+
+ /* last sample: find winner, copy indices and return RD value */
+ ind_tmp = 0;
+ min_Q25 = silk_int32_MAX;
+ for( j = 0; j < 2 * NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+ if( min_Q25 > RD_Q25[ j ] ) {
+ min_Q25 = RD_Q25[ j ];
+ ind_tmp = j;
+ }
+ }
+ for( j = 0; j < order; j++ ) {
+ indices[ j ] = ind[ ind_tmp & ( NLSF_QUANT_DEL_DEC_STATES - 1 ) ][ j ];
+ silk_assert( indices[ j ] >= -NLSF_QUANT_MAX_AMPLITUDE_EXT );
+ silk_assert( indices[ j ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT );
+ }
+ indices[ 0 ] += silk_RSHIFT( ind_tmp, NLSF_QUANT_DEL_DEC_STATES_LOG2 );
+ silk_assert( indices[ 0 ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT );
+ silk_assert( min_Q25 >= 0 );
+ return min_Q25;
+}
diff --git a/drivers/opus/silk/NLSF_encode.c b/drivers/opus/silk/NLSF_encode.c
new file mode 100644
index 0000000000..bf67bd5cf1
--- /dev/null
+++ b/drivers/opus/silk/NLSF_encode.c
@@ -0,0 +1,136 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/***********************/
+/* NLSF vector encoder */
+/***********************/
+opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */
+ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */
+ opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */
+ const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */
+ const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */
+ const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */
+ const opus_int nSurvivors, /* I Max survivors after first stage */
+ const opus_int signalType /* I Signal type: 0/1/2 */
+)
+{
+ opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7;
+ opus_int32 W_tmp_Q9;
+ VARDECL( opus_int32, err_Q26 );
+ VARDECL( opus_int32, RD_Q25 );
+ VARDECL( opus_int, tempIndices1 );
+ VARDECL( opus_int8, tempIndices2 );
+ opus_int16 res_Q15[ MAX_LPC_ORDER ];
+ opus_int16 res_Q10[ MAX_LPC_ORDER ];
+ opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ];
+ opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
+ opus_int16 W_adj_Q5[ MAX_LPC_ORDER ];
+ opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+ opus_int16 ec_ix[ MAX_LPC_ORDER ];
+ const opus_uint8 *pCB_element, *iCDF_ptr;
+ SAVE_STACK;
+
+ silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );
+ silk_assert( signalType >= 0 && signalType <= 2 );
+ silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 );
+
+ /* NLSF stabilization */
+ silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
+
+ /* First stage: VQ */
+ ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );
+ silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );
+
+ /* Sort the quantization errors */
+ ALLOC( tempIndices1, nSurvivors, opus_int );
+ silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
+
+ ALLOC( RD_Q25, nSurvivors, opus_int32 );
+ ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );
+
+ /* Loop over survivors */
+ for( s = 0; s < nSurvivors; s++ ) {
+ ind1 = tempIndices1[ s ];
+
+ /* Residual after first stage */
+ pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ];
+ for( i = 0; i < psNLSF_CB->order; i++ ) {
+ NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 );
+ res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ];
+ }
+
+ /* Weights from codebook vector */
+ silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order );
+
+ /* Apply square-rooted weights */
+ for( i = 0; i < psNLSF_CB->order; i++ ) {
+ W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
+ res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 );
+ }
+
+ /* Modify input weights accordingly */
+ for( i = 0; i < psNLSF_CB->order; i++ ) {
+ W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] );
+ }
+
+ /* Unpack entropy table indices and predictor for current CB1 index */
+ silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, ind1 );
+
+ /* Trellis quantizer */
+ RD_Q25[ s ] = silk_NLSF_del_dec_quant( &tempIndices2[ s * MAX_LPC_ORDER ], res_Q10, W_adj_Q5, pred_Q8, ec_ix,
+ psNLSF_CB->ec_Rates_Q5, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->invQuantStepSize_Q6, NLSF_mu_Q20, psNLSF_CB->order );
+
+ /* Add rate for first stage */
+ iCDF_ptr = &psNLSF_CB->CB1_iCDF[ ( signalType >> 1 ) * psNLSF_CB->nVectors ];
+ if( ind1 == 0 ) {
+ prob_Q8 = 256 - iCDF_ptr[ ind1 ];
+ } else {
+ prob_Q8 = iCDF_ptr[ ind1 - 1 ] - iCDF_ptr[ ind1 ];
+ }
+ bits_q7 = ( 8 << 7 ) - silk_lin2log( prob_Q8 );
+ RD_Q25[ s ] = silk_SMLABB( RD_Q25[ s ], bits_q7, silk_RSHIFT( NLSF_mu_Q20, 2 ) );
+ }
+
+ /* Find the lowest rate-distortion error */
+ silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 );
+
+ NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ];
+ silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) );
+
+ /* Decode */
+ silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
+
+ RESTORE_STACK;
+ return RD_Q25[ 0 ];
+}
diff --git a/drivers/opus/silk/NLSF_stabilize.c b/drivers/opus/silk/NLSF_stabilize.c
new file mode 100644
index 0000000000..a1bf20b8d4
--- /dev/null
+++ b/drivers/opus/silk/NLSF_stabilize.c
@@ -0,0 +1,142 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* NLSF stabilizer: */
+/* */
+/* - Moves NLSFs further apart if they are too close */
+/* - Moves NLSFs away from borders if they are too close */
+/* - High effort to achieve a modification with minimum */
+/* Euclidean distance to input vector */
+/* - Output are sorted NLSF coefficients */
+/* */
+
+#include "SigProc_FIX.h"
+
+/* Constant Definitions */
+#define MAX_LOOPS 20
+
+/* NLSF stabilizer, for a single input data vector */
+void silk_NLSF_stabilize(
+ opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */
+ const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */
+ const opus_int L /* I Number of NLSF parameters in the input vector */
+)
+{
+ opus_int i, I=0, k, loops;
+ opus_int16 center_freq_Q15;
+ opus_int32 diff_Q15, min_diff_Q15, min_center_Q15, max_center_Q15;
+
+ /* This is necessary to ensure an output within range of a opus_int16 */
+ silk_assert( NDeltaMin_Q15[L] >= 1 );
+
+ for( loops = 0; loops < MAX_LOOPS; loops++ ) {
+ /**************************/
+ /* Find smallest distance */
+ /**************************/
+ /* First element */
+ min_diff_Q15 = NLSF_Q15[0] - NDeltaMin_Q15[0];
+ I = 0;
+ /* Middle elements */
+ for( i = 1; i <= L-1; i++ ) {
+ diff_Q15 = NLSF_Q15[i] - ( NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
+ if( diff_Q15 < min_diff_Q15 ) {
+ min_diff_Q15 = diff_Q15;
+ I = i;
+ }
+ }
+ /* Last element */
+ diff_Q15 = ( 1 << 15 ) - ( NLSF_Q15[L-1] + NDeltaMin_Q15[L] );
+ if( diff_Q15 < min_diff_Q15 ) {
+ min_diff_Q15 = diff_Q15;
+ I = L;
+ }
+
+ /***************************************************/
+ /* Now check if the smallest distance non-negative */
+ /***************************************************/
+ if( min_diff_Q15 >= 0 ) {
+ return;
+ }
+
+ if( I == 0 ) {
+ /* Move away from lower limit */
+ NLSF_Q15[0] = NDeltaMin_Q15[0];
+
+ } else if( I == L) {
+ /* Move away from higher limit */
+ NLSF_Q15[L-1] = ( 1 << 15 ) - NDeltaMin_Q15[L];
+
+ } else {
+ /* Find the lower extreme for the location of the current center frequency */
+ min_center_Q15 = 0;
+ for( k = 0; k < I; k++ ) {
+ min_center_Q15 += NDeltaMin_Q15[k];
+ }
+ min_center_Q15 += silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+
+ /* Find the upper extreme for the location of the current center frequency */
+ max_center_Q15 = 1 << 15;
+ for( k = L; k > I; k-- ) {
+ max_center_Q15 -= NDeltaMin_Q15[k];
+ }
+ max_center_Q15 -= silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+
+ /* Move apart, sorted by value, keeping the same center frequency */
+ center_freq_Q15 = (opus_int16)silk_LIMIT_32( silk_RSHIFT_ROUND( (opus_int32)NLSF_Q15[I-1] + (opus_int32)NLSF_Q15[I], 1 ),
+ min_center_Q15, max_center_Q15 );
+ NLSF_Q15[I-1] = center_freq_Q15 - silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+ NLSF_Q15[I] = NLSF_Q15[I-1] + NDeltaMin_Q15[I];
+ }
+ }
+
+ /* Safe and simple fall back method, which is less ideal than the above */
+ if( loops == MAX_LOOPS )
+ {
+ /* Insertion sort (fast for already almost sorted arrays): */
+ /* Best case: O(n) for an already sorted array */
+ /* Worst case: O(n^2) for an inversely sorted array */
+ silk_insertion_sort_increasing_all_values_int16( &NLSF_Q15[0], L );
+
+ /* First NLSF should be no less than NDeltaMin[0] */
+ NLSF_Q15[0] = silk_max_int( NLSF_Q15[0], NDeltaMin_Q15[0] );
+
+ /* Keep delta_min distance between the NLSFs */
+ for( i = 1; i < L; i++ )
+ NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
+
+ /* Last NLSF should be no higher than 1 - NDeltaMin[L] */
+ NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] );
+
+ /* Keep NDeltaMin distance between the NLSFs */
+ for( i = L-2; i >= 0; i-- )
+ NLSF_Q15[i] = silk_min_int( NLSF_Q15[i], NLSF_Q15[i+1] - NDeltaMin_Q15[i+1] );
+ }
+}
diff --git a/drivers/opus/silk/NLSF_unpack.c b/drivers/opus/silk/NLSF_unpack.c
new file mode 100644
index 0000000000..60242a3b52
--- /dev/null
+++ b/drivers/opus/silk/NLSF_unpack.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Unpack predictor values and indices for entropy coding tables */
+void silk_NLSF_unpack(
+ opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */
+ opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */
+ const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */
+ const opus_int CB1_index /* I Index of vector in first LSF codebook */
+)
+{
+ opus_int i;
+ opus_uint8 entry;
+ const opus_uint8 *ec_sel_ptr;
+
+ ec_sel_ptr = &psNLSF_CB->ec_sel[ CB1_index * psNLSF_CB->order / 2 ];
+ for( i = 0; i < psNLSF_CB->order; i += 2 ) {
+ entry = *ec_sel_ptr++;
+ ec_ix [ i ] = silk_SMULBB( silk_RSHIFT( entry, 1 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 );
+ pred_Q8[ i ] = psNLSF_CB->pred_Q8[ i + ( entry & 1 ) * ( psNLSF_CB->order - 1 ) ];
+ ec_ix [ i + 1 ] = silk_SMULBB( silk_RSHIFT( entry, 5 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 );
+ pred_Q8[ i + 1 ] = psNLSF_CB->pred_Q8[ i + ( silk_RSHIFT( entry, 4 ) & 1 ) * ( psNLSF_CB->order - 1 ) + 1 ];
+ }
+}
+
diff --git a/drivers/opus/silk/NSQ.c b/drivers/opus/silk/NSQ.c
new file mode 100644
index 0000000000..a08e34e893
--- /dev/null
+++ b/drivers/opus/silk/NSQ.c
@@ -0,0 +1,446 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE void silk_nsq_scale_states(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ const opus_int32 x_Q3[], /* I input in Q3 */
+ opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
+ const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
+ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
+ opus_int subfr, /* I subframe number */
+ const opus_int LTP_scale_Q14, /* I */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */
+ const opus_int signal_type /* I Signal type */
+);
+
+static OPUS_INLINE void silk_noise_shape_quantizer(
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ opus_int signalType, /* I Signal type */
+ const opus_int32 x_sc_Q10[], /* I */
+ opus_int8 pulses[], /* O */
+ opus_int16 xq[], /* O */
+ opus_int32 sLTP_Q15[], /* I/O LTP state */
+ const opus_int16 a_Q12[], /* I Short term prediction coefs */
+ const opus_int16 b_Q14[], /* I Long term prediction coefs */
+ const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */
+ opus_int lag, /* I Pitch lag */
+ opus_int32 HarmShapeFIRPacked_Q14, /* I */
+ opus_int Tilt_Q14, /* I Spectral tilt */
+ opus_int32 LF_shp_Q14, /* I */
+ opus_int32 Gain_Q16, /* I */
+ opus_int Lambda_Q10, /* I */
+ opus_int offset_Q10, /* I */
+ opus_int length, /* I Input length */
+ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
+ opus_int predictLPCOrder /* I Prediction filter order */
+);
+
+void silk_NSQ(
+ const silk_encoder_state *psEncC, /* I/O Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ SideInfoIndices *psIndices, /* I/O Quantization Indices */
+ const opus_int32 x_Q3[], /* I Prefiltered input signal */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
+ const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
+ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
+ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
+ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
+ const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
+ const opus_int LTP_scale_Q14 /* I LTP state scaling */
+)
+{
+ opus_int k, lag, start_idx, LSF_interpolation_flag;
+ const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
+ opus_int16 *pxq;
+ VARDECL( opus_int32, sLTP_Q15 );
+ VARDECL( opus_int16, sLTP );
+ opus_int32 HarmShapeFIRPacked_Q14;
+ opus_int offset_Q10;
+ VARDECL( opus_int32, x_sc_Q10 );
+ SAVE_STACK;
+
+ NSQ->rand_seed = psIndices->Seed;
+
+ /* Set unvoiced lag to the previous one, overwrite later for voiced */
+ lag = NSQ->lagPrev;
+
+ silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+ offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+
+ if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+ LSF_interpolation_flag = 0;
+ } else {
+ LSF_interpolation_flag = 1;
+ }
+
+ ALLOC( sLTP_Q15,
+ psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+ ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+ ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+ /* Set up pointers to start of sub frame */
+ NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+ NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+ pxq = &NSQ->xq[ psEncC->ltp_mem_length ];
+ for( k = 0; k < psEncC->nb_subfr; k++ ) {
+ A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
+ B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
+ AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
+
+ /* Noise shape parameters */
+ silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+ HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+ HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+ NSQ->rewhite_flag = 0;
+ if( psIndices->signalType == TYPE_VOICED ) {
+ /* Voiced */
+ lag = pitchL[ k ];
+
+ /* Re-whitening */
+ if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+ /* Rewhiten with new A coefs */
+ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+ silk_assert( start_idx > 0 );
+
+ silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+ A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+
+ NSQ->rewhite_flag = 1;
+ NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+ }
+ }
+
+ silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
+
+ silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
+ AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
+ offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+
+ x_Q3 += psEncC->subfr_length;
+ pulses += psEncC->subfr_length;
+ pxq += psEncC->subfr_length;
+ }
+
+ /* Update lagPrev for next frame */
+ NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
+
+ /* Save quantized speech and noise shaping signals */
+ /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
+ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+ silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+ RESTORE_STACK;
+}
+
+/***********************************/
+/* silk_noise_shape_quantizer */
+/***********************************/
+static OPUS_INLINE void silk_noise_shape_quantizer(
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ opus_int signalType, /* I Signal type */
+ const opus_int32 x_sc_Q10[], /* I */
+ opus_int8 pulses[], /* O */
+ opus_int16 xq[], /* O */
+ opus_int32 sLTP_Q15[], /* I/O LTP state */
+ const opus_int16 a_Q12[], /* I Short term prediction coefs */
+ const opus_int16 b_Q14[], /* I Long term prediction coefs */
+ const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */
+ opus_int lag, /* I Pitch lag */
+ opus_int32 HarmShapeFIRPacked_Q14, /* I */
+ opus_int Tilt_Q14, /* I Spectral tilt */
+ opus_int32 LF_shp_Q14, /* I */
+ opus_int32 Gain_Q16, /* I */
+ opus_int Lambda_Q10, /* I */
+ opus_int offset_Q10, /* I */
+ opus_int length, /* I Input length */
+ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
+ opus_int predictLPCOrder /* I Prediction filter order */
+)
+{
+ opus_int i, j;
+ opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
+ opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
+ opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+ opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
+ opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+
+ shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+ pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+ Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 );
+
+ /* Set up short term AR state */
+ psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
+
+ for( i = 0; i < length; i++ ) {
+ /* Generate dither */
+ NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
+
+ /* Short-term prediction */
+ silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+ if( predictLPCOrder == 16 ) {
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
+ }
+
+ /* Long-term prediction */
+ if( signalType == TYPE_VOICED ) {
+ /* Unrolled loop */
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LTP_pred_Q13 = 2;
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], b_Q14[ 0 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+ pred_lag_ptr++;
+ } else {
+ LTP_pred_Q13 = 0;
+ }
+
+ /* Noise shape feedback */
+ silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
+ tmp2 = psLPC_Q14[ 0 ];
+ tmp1 = NSQ->sAR2_Q14[ 0 ];
+ NSQ->sAR2_Q14[ 0 ] = tmp2;
+ n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
+ n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
+ for( j = 2; j < shapingLPCOrder; j += 2 ) {
+ tmp2 = NSQ->sAR2_Q14[ j - 1 ];
+ NSQ->sAR2_Q14[ j - 1 ] = tmp1;
+ n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
+ tmp1 = NSQ->sAR2_Q14[ j + 0 ];
+ NSQ->sAR2_Q14[ j + 0 ] = tmp2;
+ n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
+ }
+ NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+ n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+ n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */
+ n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
+
+ n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
+ n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 );
+
+ silk_assert( lag > 0 || signalType != TYPE_VOICED );
+
+ /* Combine prediction and noise shaping signals */
+ tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */
+ tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */
+ if( lag > 0 ) {
+ /* Symmetric, packed FIR coefficients */
+ n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+ n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 );
+ n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 );
+ shp_lag_ptr++;
+
+ tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 ); /* Q13 */
+ tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 ); /* Q13 */
+ tmp1 = silk_RSHIFT_ROUND( tmp1, 3 ); /* Q10 */
+ } else {
+ tmp1 = silk_RSHIFT_ROUND( tmp1, 2 ); /* Q10 */
+ }
+
+ r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */
+
+ /* Flip sign depending on dither */
+ if ( NSQ->rand_seed < 0 ) {
+ r_Q10 = -r_Q10;
+ }
+ r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+ /* Find two quantization level candidates and measure their rate-distortion */
+ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+ q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+ if( q1_Q0 > 0 ) {
+ q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+ q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
+ q2_Q10 = silk_ADD32( q1_Q10, 1024 );
+ rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+ rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+ } else if( q1_Q0 == 0 ) {
+ q1_Q10 = offset_Q10;
+ q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+ rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+ rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+ } else if( q1_Q0 == -1 ) {
+ q2_Q10 = offset_Q10;
+ q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+ rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+ rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+ } else { /* Q1_Q0 < -1 */
+ q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+ q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
+ q2_Q10 = silk_ADD32( q1_Q10, 1024 );
+ rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+ rd2_Q20 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+ }
+ rr_Q10 = silk_SUB32( r_Q10, q1_Q10 );
+ rd1_Q20 = silk_SMLABB( rd1_Q20, rr_Q10, rr_Q10 );
+ rr_Q10 = silk_SUB32( r_Q10, q2_Q10 );
+ rd2_Q20 = silk_SMLABB( rd2_Q20, rr_Q10, rr_Q10 );
+
+ if( rd2_Q20 < rd1_Q20 ) {
+ q1_Q10 = q2_Q10;
+ }
+
+ pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 );
+
+ /* Excitation */
+ exc_Q14 = silk_LSHIFT( q1_Q10, 4 );
+ if ( NSQ->rand_seed < 0 ) {
+ exc_Q14 = -exc_Q14;
+ }
+
+ /* Add predictions */
+ LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 );
+ xq_Q14 = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 );
+
+ /* Scale XQ back to normal level before saving */
+ xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( xq_Q14, Gain_Q10 ), 8 ) );
+
+ /* Update states */
+ psLPC_Q14++;
+ *psLPC_Q14 = xq_Q14;
+ sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
+ NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
+
+ NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
+ sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 );
+ NSQ->sLTP_shp_buf_idx++;
+ NSQ->sLTP_buf_idx++;
+
+ /* Make dither dependent on quantized signal */
+ NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] );
+ }
+
+ /* Update LPC synth buffer */
+ silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+}
+
+static OPUS_INLINE void silk_nsq_scale_states(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ const opus_int32 x_Q3[], /* I input in Q3 */
+ opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
+ const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
+ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
+ opus_int subfr, /* I subframe number */
+ const opus_int LTP_scale_Q14, /* I */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */
+ const opus_int signal_type /* I Signal type */
+)
+{
+ opus_int i, lag;
+ opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+
+ lag = pitchL[ subfr ];
+ inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+ silk_assert( inv_gain_Q31 != 0 );
+
+ /* Calculate gain adjustment factor */
+ if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+ gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+ } else {
+ gain_adj_Q16 = (opus_int32)1 << 16;
+ }
+
+ /* Scale input */
+ inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+ for( i = 0; i < psEncC->subfr_length; i++ ) {
+ x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+ }
+
+ /* Save inverse gain */
+ NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+ /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+ if( NSQ->rewhite_flag ) {
+ if( subfr == 0 ) {
+ /* Do LTP downscaling */
+ inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+ }
+ for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+ silk_assert( i < MAX_FRAME_LENGTH );
+ sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+ }
+ }
+
+ /* Adjust for changing gain */
+ if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+ /* Scale long-term shaping state */
+ for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+ NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+ }
+
+ /* Scale long-term prediction state */
+ if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+ for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+ sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+ }
+ }
+
+ NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
+
+ /* Scale short-term prediction and shaping states */
+ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+ NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] );
+ }
+ for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+ NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
+ }
+ }
+}
diff --git a/drivers/opus/silk/NSQ_del_dec.c b/drivers/opus/silk/NSQ_del_dec.c
new file mode 100644
index 0000000000..8ac6311b11
--- /dev/null
+++ b/drivers/opus/silk/NSQ_del_dec.c
@@ -0,0 +1,719 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+typedef struct {
+ opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+ opus_int32 RandState[ DECISION_DELAY ];
+ opus_int32 Q_Q10[ DECISION_DELAY ];
+ opus_int32 Xq_Q14[ DECISION_DELAY ];
+ opus_int32 Pred_Q15[ DECISION_DELAY ];
+ opus_int32 Shape_Q14[ DECISION_DELAY ];
+ opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+ opus_int32 LF_AR_Q14;
+ opus_int32 Seed;
+ opus_int32 SeedInit;
+ opus_int32 RD_Q10;
+} NSQ_del_dec_struct;
+
+typedef struct {
+ opus_int32 Q_Q10;
+ opus_int32 RD_Q10;
+ opus_int32 xq_Q14;
+ opus_int32 LF_AR_Q14;
+ opus_int32 sLTP_shp_Q14;
+ opus_int32 LPC_exc_Q14;
+} NSQ_sample_struct;
+
+typedef NSQ_sample_struct NSQ_sample_pair[ 2 ];
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
+ const opus_int32 x_Q3[], /* I Input in Q3 */
+ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
+ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
+ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
+ opus_int subfr, /* I Subframe number */
+ opus_int nStatesDelayedDecision, /* I Number of del dec states */
+ const opus_int LTP_scale_Q14, /* I LTP state scaling */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */
+ const opus_int signal_type, /* I Signal type */
+ const opus_int decisionDelay /* I Decision delay */
+);
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
+ opus_int signalType, /* I Signal type */
+ const opus_int32 x_Q10[], /* I */
+ opus_int8 pulses[], /* O */
+ opus_int16 xq[], /* O */
+ opus_int32 sLTP_Q15[], /* I/O LTP filter state */
+ opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */
+ const opus_int16 a_Q12[], /* I Short term prediction coefs */
+ const opus_int16 b_Q14[], /* I Long term prediction coefs */
+ const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */
+ opus_int lag, /* I Pitch lag */
+ opus_int32 HarmShapeFIRPacked_Q14, /* I */
+ opus_int Tilt_Q14, /* I Spectral tilt */
+ opus_int32 LF_shp_Q14, /* I */
+ opus_int32 Gain_Q16, /* I */
+ opus_int Lambda_Q10, /* I */
+ opus_int offset_Q10, /* I */
+ opus_int length, /* I Input length */
+ opus_int subfr, /* I Subframe number */
+ opus_int shapingLPCOrder, /* I Shaping LPC filter order */
+ opus_int predictLPCOrder, /* I Prediction filter order */
+ opus_int warping_Q16, /* I */
+ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
+ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
+ opus_int decisionDelay /* I */
+);
+
+void silk_NSQ_del_dec(
+ const silk_encoder_state *psEncC, /* I/O Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ SideInfoIndices *psIndices, /* I/O Quantization Indices */
+ const opus_int32 x_Q3[], /* I Prefiltered input signal */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
+ const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
+ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
+ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
+ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
+ const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
+ const opus_int LTP_scale_Q14 /* I LTP state scaling */
+)
+{
+ opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
+ opus_int last_smple_idx, smpl_buf_idx, decisionDelay;
+ const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
+ opus_int16 *pxq;
+ VARDECL( opus_int32, sLTP_Q15 );
+ VARDECL( opus_int16, sLTP );
+ opus_int32 HarmShapeFIRPacked_Q14;
+ opus_int offset_Q10;
+ opus_int32 RDmin_Q10, Gain_Q10;
+ VARDECL( opus_int32, x_sc_Q10 );
+ VARDECL( opus_int32, delayedGain_Q10 );
+ VARDECL( NSQ_del_dec_struct, psDelDec );
+ NSQ_del_dec_struct *psDD;
+ SAVE_STACK;
+
+ /* Set unvoiced lag to the previous one, overwrite later for voiced */
+ lag = NSQ->lagPrev;
+
+ silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+ /* Initialize delayed decision states */
+ ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
+ silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
+ for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
+ psDD = &psDelDec[ k ];
+ psDD->Seed = ( k + psIndices->Seed ) & 3;
+ psDD->SeedInit = psDD->Seed;
+ psDD->RD_Q10 = 0;
+ psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14;
+ psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
+ silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+ silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
+ }
+
+ offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+ smpl_buf_idx = 0; /* index of oldest samples */
+
+ decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
+
+ /* For voiced frames limit the decision delay to lower than the pitch lag */
+ if( psIndices->signalType == TYPE_VOICED ) {
+ for( k = 0; k < psEncC->nb_subfr; k++ ) {
+ decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
+ }
+ } else {
+ if( lag > 0 ) {
+ decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
+ }
+ }
+
+ if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+ LSF_interpolation_flag = 0;
+ } else {
+ LSF_interpolation_flag = 1;
+ }
+
+ ALLOC( sLTP_Q15,
+ psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+ ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+ ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+ ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
+ /* Set up pointers to start of sub frame */
+ pxq = &NSQ->xq[ psEncC->ltp_mem_length ];
+ NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+ NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+ subfr = 0;
+ for( k = 0; k < psEncC->nb_subfr; k++ ) {
+ A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
+ B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
+ AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
+
+ /* Noise shape parameters */
+ silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+ HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+ HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+ NSQ->rewhite_flag = 0;
+ if( psIndices->signalType == TYPE_VOICED ) {
+ /* Voiced */
+ lag = pitchL[ k ];
+
+ /* Re-whitening */
+ if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+ if( k == 2 ) {
+ /* RESET DELAYED DECISIONS */
+ /* Find winner */
+ RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+ Winner_ind = 0;
+ for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
+ if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
+ RDmin_Q10 = psDelDec[ i ].RD_Q10;
+ Winner_ind = i;
+ }
+ }
+ for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
+ if( i != Winner_ind ) {
+ psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
+ silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
+ }
+ }
+
+ /* Copy final part of signals from winner state to output and long-term filter states */
+ psDD = &psDelDec[ Winner_ind ];
+ last_smple_idx = smpl_buf_idx + decisionDelay;
+ for( i = 0; i < decisionDelay; i++ ) {
+ last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+ pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+ pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+ silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
+ NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+ }
+
+ subfr = 0;
+ }
+
+ /* Rewhiten with new A coefs */
+ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+ silk_assert( start_idx > 0 );
+
+ silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+ A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+
+ NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+ NSQ->rewhite_flag = 1;
+ }
+ }
+
+ silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
+ psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
+
+ silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
+ delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
+ Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
+ psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+
+ x_Q3 += psEncC->subfr_length;
+ pulses += psEncC->subfr_length;
+ pxq += psEncC->subfr_length;
+ }
+
+ /* Find winner */
+ RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+ Winner_ind = 0;
+ for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
+ if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
+ RDmin_Q10 = psDelDec[ k ].RD_Q10;
+ Winner_ind = k;
+ }
+ }
+
+ /* Copy final part of signals from winner state to output and long-term filter states */
+ psDD = &psDelDec[ Winner_ind ];
+ psIndices->Seed = psDD->SeedInit;
+ last_smple_idx = smpl_buf_idx + decisionDelay;
+ Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
+ for( i = 0; i < decisionDelay; i++ ) {
+ last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+ pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+ pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+ silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
+ NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+ }
+ silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+ silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
+
+ /* Update states */
+ NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
+ NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
+
+ /* Save quantized speech signal */
+ /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
+ silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+ silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+ RESTORE_STACK;
+}
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
+ opus_int signalType, /* I Signal type */
+ const opus_int32 x_Q10[], /* I */
+ opus_int8 pulses[], /* O */
+ opus_int16 xq[], /* O */
+ opus_int32 sLTP_Q15[], /* I/O LTP filter state */
+ opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */
+ const opus_int16 a_Q12[], /* I Short term prediction coefs */
+ const opus_int16 b_Q14[], /* I Long term prediction coefs */
+ const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */
+ opus_int lag, /* I Pitch lag */
+ opus_int32 HarmShapeFIRPacked_Q14, /* I */
+ opus_int Tilt_Q14, /* I Spectral tilt */
+ opus_int32 LF_shp_Q14, /* I */
+ opus_int32 Gain_Q16, /* I */
+ opus_int Lambda_Q10, /* I */
+ opus_int offset_Q10, /* I */
+ opus_int length, /* I Input length */
+ opus_int subfr, /* I Subframe number */
+ opus_int shapingLPCOrder, /* I Shaping LPC filter order */
+ opus_int predictLPCOrder, /* I Prediction filter order */
+ opus_int warping_Q16, /* I */
+ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
+ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
+ opus_int decisionDelay /* I */
+)
+{
+ opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
+ opus_int32 Winner_rand_state;
+ opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
+ opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
+ opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+ opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
+ opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+ VARDECL( NSQ_sample_pair, psSampleState );
+ NSQ_del_dec_struct *psDD;
+ NSQ_sample_struct *psSS;
+ SAVE_STACK;
+
+ silk_assert( nStatesDelayedDecision > 0 );
+ ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
+
+ shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+ pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+ Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 );
+
+ for( i = 0; i < length; i++ ) {
+ /* Perform common calculations used in all states */
+
+ /* Long-term prediction */
+ if( signalType == TYPE_VOICED ) {
+ /* Unrolled loop */
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LTP_pred_Q14 = 2;
+ LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[ 0 ] );
+ LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
+ LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
+ LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
+ LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+ LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */
+ pred_lag_ptr++;
+ } else {
+ LTP_pred_Q14 = 0;
+ }
+
+ /* Long-term shaping */
+ if( lag > 0 ) {
+ /* Symmetric, packed FIR coefficients */
+ n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+ n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 );
+ n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */
+ shp_lag_ptr++;
+ } else {
+ n_LTP_Q14 = 0;
+ }
+
+ for( k = 0; k < nStatesDelayedDecision; k++ ) {
+ /* Delayed decision state */
+ psDD = &psDelDec[ k ];
+
+ /* Sample state */
+ psSS = psSampleState[ k ];
+
+ /* Generate dither */
+ psDD->Seed = silk_RAND( psDD->Seed );
+
+ /* Pointer used in short term prediction and shaping */
+ psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
+ /* Short-term prediction */
+ silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+ if( predictLPCOrder == 16 ) {
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
+ LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
+ }
+ LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
+
+ /* Noise shape feedback */
+ silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
+ /* Output of lowpass section */
+ tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
+ psDD->sAR2_Q14[ 0 ] = tmp2;
+ n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
+ n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
+ /* Loop over allpass sections */
+ for( j = 2; j < shapingLPCOrder; j += 2 ) {
+ /* Output of allpass section */
+ tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
+ psDD->sAR2_Q14[ j - 1 ] = tmp1;
+ n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
+ psDD->sAR2_Q14[ j + 0 ] = tmp2;
+ n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
+ }
+ psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+ n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+ n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */
+ n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */
+ n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */
+
+ n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */
+ n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */
+ n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */
+
+ /* Input minus prediction plus noise feedback */
+ /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */
+ tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */
+ tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */
+ tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */
+ tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */
+
+ r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */
+
+ /* Flip sign depending on dither */
+ if ( psDD->Seed < 0 ) {
+ r_Q10 = -r_Q10;
+ }
+ r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+ /* Find two quantization level candidates and measure their rate-distortion */
+ q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+ q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+ if( q1_Q0 > 0 ) {
+ q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+ q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
+ q2_Q10 = silk_ADD32( q1_Q10, 1024 );
+ rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+ rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+ } else if( q1_Q0 == 0 ) {
+ q1_Q10 = offset_Q10;
+ q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+ rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+ rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+ } else if( q1_Q0 == -1 ) {
+ q2_Q10 = offset_Q10;
+ q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+ rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+ rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+ } else { /* q1_Q0 < -1 */
+ q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+ q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
+ q2_Q10 = silk_ADD32( q1_Q10, 1024 );
+ rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+ rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+ }
+ rr_Q10 = silk_SUB32( r_Q10, q1_Q10 );
+ rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
+ rr_Q10 = silk_SUB32( r_Q10, q2_Q10 );
+ rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
+
+ if( rd1_Q10 < rd2_Q10 ) {
+ psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+ psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+ psSS[ 0 ].Q_Q10 = q1_Q10;
+ psSS[ 1 ].Q_Q10 = q2_Q10;
+ } else {
+ psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+ psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+ psSS[ 0 ].Q_Q10 = q2_Q10;
+ psSS[ 1 ].Q_Q10 = q1_Q10;
+ }
+
+ /* Update states for best quantization */
+
+ /* Quantized excitation */
+ exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
+ if ( psDD->Seed < 0 ) {
+ exc_Q14 = -exc_Q14;
+ }
+
+ /* Add predictions */
+ LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+ xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+ /* Update states */
+ sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
+ psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+ psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14;
+ psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14;
+ psSS[ 0 ].xq_Q14 = xq_Q14;
+
+ /* Update states for second best quantization */
+
+ /* Quantized excitation */
+ exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
+ if ( psDD->Seed < 0 ) {
+ exc_Q14 = -exc_Q14;
+ }
+
+
+ /* Add predictions */
+ LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+ xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+ /* Update states */
+ sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
+ psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+ psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14;
+ psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14;
+ psSS[ 1 ].xq_Q14 = xq_Q14;
+ }
+
+ *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */
+ last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */
+
+ /* Find winner */
+ RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+ Winner_ind = 0;
+ for( k = 1; k < nStatesDelayedDecision; k++ ) {
+ if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
+ RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10;
+ Winner_ind = k;
+ }
+ }
+
+ /* Increase RD values of expired states */
+ Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
+ for( k = 0; k < nStatesDelayedDecision; k++ ) {
+ if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
+ psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
+ psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
+ silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
+ }
+ }
+
+ /* Find worst in first set and best in second set */
+ RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+ RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10;
+ RDmax_ind = 0;
+ RDmin_ind = 0;
+ for( k = 1; k < nStatesDelayedDecision; k++ ) {
+ /* find worst in first set */
+ if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
+ RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10;
+ RDmax_ind = k;
+ }
+ /* find best in second set */
+ if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
+ RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10;
+ RDmin_ind = k;
+ }
+ }
+
+ /* Replace a state if best from second set outperforms worst in first set */
+ if( RDmin_Q10 < RDmax_Q10 ) {
+ silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
+ ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
+ silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
+ }
+
+ /* Write samples from winner to output and long-term filter states */
+ psDD = &psDelDec[ Winner_ind ];
+ if( subfr > 0 || i >= decisionDelay ) {
+ pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+ xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+ silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
+ NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
+ sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ];
+ }
+ NSQ->sLTP_shp_buf_idx++;
+ NSQ->sLTP_buf_idx++;
+
+ /* Update states */
+ for( k = 0; k < nStatesDelayedDecision; k++ ) {
+ psDD = &psDelDec[ k ];
+ psSS = &psSampleState[ k ][ 0 ];
+ psDD->LF_AR_Q14 = psSS->LF_AR_Q14;
+ psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
+ psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14;
+ psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10;
+ psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
+ psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14;
+ psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
+ psDD->RandState[ *smpl_buf_idx ] = psDD->Seed;
+ psDD->RD_Q10 = psSS->RD_Q10;
+ }
+ delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10;
+ }
+ /* Update LPC states */
+ for( k = 0; k < nStatesDelayedDecision; k++ ) {
+ psDD = &psDelDec[ k ];
+ silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+ }
+ RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
+ const opus_int32 x_Q3[], /* I Input in Q3 */
+ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
+ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
+ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
+ opus_int subfr, /* I Subframe number */
+ opus_int nStatesDelayedDecision, /* I Number of del dec states */
+ const opus_int LTP_scale_Q14, /* I LTP state scaling */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */
+ const opus_int signal_type, /* I Signal type */
+ const opus_int decisionDelay /* I Decision delay */
+)
+{
+ opus_int i, k, lag;
+ opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+ NSQ_del_dec_struct *psDD;
+
+ lag = pitchL[ subfr ];
+ inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+ silk_assert( inv_gain_Q31 != 0 );
+
+ /* Calculate gain adjustment factor */
+ if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+ gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+ } else {
+ gain_adj_Q16 = (opus_int32)1 << 16;
+ }
+
+ /* Scale input */
+ inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+ for( i = 0; i < psEncC->subfr_length; i++ ) {
+ x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+ }
+
+ /* Save inverse gain */
+ NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+ /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+ if( NSQ->rewhite_flag ) {
+ if( subfr == 0 ) {
+ /* Do LTP downscaling */
+ inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+ }
+ for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+ silk_assert( i < MAX_FRAME_LENGTH );
+ sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+ }
+ }
+
+ /* Adjust for changing gain */
+ if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+ /* Scale long-term shaping state */
+ for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+ NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+ }
+
+ /* Scale long-term prediction state */
+ if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+ for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
+ sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+ }
+ }
+
+ for( k = 0; k < nStatesDelayedDecision; k++ ) {
+ psDD = &psDelDec[ k ];
+
+ /* Scale scalar states */
+ psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
+
+ /* Scale short-term prediction and shaping states */
+ for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+ psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
+ }
+ for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+ psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
+ }
+ for( i = 0; i < DECISION_DELAY; i++ ) {
+ psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] );
+ psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
+ }
+ }
+ }
+}
diff --git a/drivers/opus/silk/PLC.c b/drivers/opus/silk/PLC.c
new file mode 100644
index 0000000000..9fc11adda9
--- /dev/null
+++ b/drivers/opus/silk/PLC.c
@@ -0,0 +1,423 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+#include "PLC.h"
+
+#define NB_ATT 2
+static const opus_int16 HARM_ATT_Q15[NB_ATT] = { 32440, 31130 }; /* 0.99, 0.95 */
+static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT] = { 31130, 26214 }; /* 0.95, 0.8 */
+static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */
+
+static OPUS_INLINE void silk_PLC_update(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl /* I/O Decoder control */
+);
+
+static OPUS_INLINE void silk_PLC_conceal(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int16 frame[] /* O LPC residual signal */
+);
+
+
+void silk_PLC_Reset(
+ silk_decoder_state *psDec /* I/O Decoder state */
+)
+{
+ psDec->sPLC.pitchL_Q8 = silk_LSHIFT( psDec->frame_length, 8 - 1 );
+ psDec->sPLC.prevGain_Q16[ 0 ] = SILK_FIX_CONST( 1, 16 );
+ psDec->sPLC.prevGain_Q16[ 1 ] = SILK_FIX_CONST( 1, 16 );
+ psDec->sPLC.subfr_length = 20;
+ psDec->sPLC.nb_subfr = 2;
+}
+
+void silk_PLC(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int16 frame[], /* I/O signal */
+ opus_int lost /* I Loss flag */
+)
+{
+ /* PLC control function */
+ if( psDec->fs_kHz != psDec->sPLC.fs_kHz ) {
+ silk_PLC_Reset( psDec );
+ psDec->sPLC.fs_kHz = psDec->fs_kHz;
+ }
+
+ if( lost ) {
+ /****************************/
+ /* Generate Signal */
+ /****************************/
+ silk_PLC_conceal( psDec, psDecCtrl, frame );
+
+ psDec->lossCnt++;
+ } else {
+ /****************************/
+ /* Update state */
+ /****************************/
+ silk_PLC_update( psDec, psDecCtrl );
+ }
+}
+
+/**************************************************/
+/* Update state of PLC */
+/**************************************************/
+static OPUS_INLINE void silk_PLC_update(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl /* I/O Decoder control */
+)
+{
+ opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14;
+ opus_int i, j;
+ silk_PLC_struct *psPLC;
+
+ psPLC = &psDec->sPLC;
+
+ /* Update parameters used in case of packet loss */
+ psDec->prevSignalType = psDec->indices.signalType;
+ LTP_Gain_Q14 = 0;
+ if( psDec->indices.signalType == TYPE_VOICED ) {
+ /* Find the parameters for the last subframe which contains a pitch pulse */
+ for( j = 0; j * psDec->subfr_length < psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; j++ ) {
+ if( j == psDec->nb_subfr ) {
+ break;
+ }
+ temp_LTP_Gain_Q14 = 0;
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ temp_LTP_Gain_Q14 += psDecCtrl->LTPCoef_Q14[ ( psDec->nb_subfr - 1 - j ) * LTP_ORDER + i ];
+ }
+ if( temp_LTP_Gain_Q14 > LTP_Gain_Q14 ) {
+ LTP_Gain_Q14 = temp_LTP_Gain_Q14;
+ silk_memcpy( psPLC->LTPCoef_Q14,
+ &psDecCtrl->LTPCoef_Q14[ silk_SMULBB( psDec->nb_subfr - 1 - j, LTP_ORDER ) ],
+ LTP_ORDER * sizeof( opus_int16 ) );
+
+ psPLC->pitchL_Q8 = silk_LSHIFT( psDecCtrl->pitchL[ psDec->nb_subfr - 1 - j ], 8 );
+ }
+ }
+
+ silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) );
+ psPLC->LTPCoef_Q14[ LTP_ORDER / 2 ] = LTP_Gain_Q14;
+
+ /* Limit LT coefs */
+ if( LTP_Gain_Q14 < V_PITCH_GAIN_START_MIN_Q14 ) {
+ opus_int scale_Q10;
+ opus_int32 tmp;
+
+ tmp = silk_LSHIFT( V_PITCH_GAIN_START_MIN_Q14, 10 );
+ scale_Q10 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) );
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q10 ), 10 );
+ }
+ } else if( LTP_Gain_Q14 > V_PITCH_GAIN_START_MAX_Q14 ) {
+ opus_int scale_Q14;
+ opus_int32 tmp;
+
+ tmp = silk_LSHIFT( V_PITCH_GAIN_START_MAX_Q14, 14 );
+ scale_Q14 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) );
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q14 ), 14 );
+ }
+ }
+ } else {
+ psPLC->pitchL_Q8 = silk_LSHIFT( silk_SMULBB( psDec->fs_kHz, 18 ), 8 );
+ silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ));
+ }
+
+ /* Save LPC coeficients */
+ silk_memcpy( psPLC->prevLPC_Q12, psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) );
+ psPLC->prevLTP_scale_Q14 = psDecCtrl->LTP_scale_Q14;
+
+ /* Save last two gains */
+ silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) );
+
+ psPLC->subfr_length = psDec->subfr_length;
+ psPLC->nb_subfr = psDec->nb_subfr;
+}
+
+static OPUS_INLINE void silk_PLC_conceal(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int16 frame[] /* O LPC residual signal */
+)
+{
+ opus_int i, j, k;
+ opus_int lag, idx, sLTP_buf_idx, shift1, shift2;
+ opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30;
+ opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
+ opus_int32 LPC_pred_Q10, LTP_pred_Q12;
+ opus_int16 rand_scale_Q14;
+ opus_int16 *B_Q14, *exc_buf_ptr;
+ opus_int32 *sLPC_Q14_ptr;
+ VARDECL( opus_int16, exc_buf );
+ opus_int16 A_Q12[ MAX_LPC_ORDER ];
+ VARDECL( opus_int16, sLTP );
+ VARDECL( opus_int32, sLTP_Q14 );
+ silk_PLC_struct *psPLC = &psDec->sPLC;
+ opus_int32 prevGain_Q10[2];
+ SAVE_STACK;
+
+ ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
+ ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+ ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+
+ prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
+ prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
+
+ if( psDec->first_frame_after_reset ) {
+ silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
+ }
+
+ /* Find random noise component */
+ /* Scale previous excitation signal */
+ exc_buf_ptr = exc_buf;
+ for( k = 0; k < 2; k++ ) {
+ for( i = 0; i < psPLC->subfr_length; i++ ) {
+ exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
+ silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
+ }
+ exc_buf_ptr += psPLC->subfr_length;
+ }
+ /* Find the subframe with lowest energy of the last two and use that as random noise generator */
+ silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length );
+ silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
+
+ if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
+ /* First sub-frame has lowest energy */
+ rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, ( psPLC->nb_subfr - 1 ) * psPLC->subfr_length - RAND_BUF_SIZE ) ];
+ } else {
+ /* Second sub-frame has lowest energy */
+ rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, psPLC->nb_subfr * psPLC->subfr_length - RAND_BUF_SIZE ) ];
+ }
+
+ /* Set up Gain to random noise component */
+ B_Q14 = psPLC->LTPCoef_Q14;
+ rand_scale_Q14 = psPLC->randScale_Q14;
+
+ /* Set up attenuation gains */
+ harm_Gain_Q15 = HARM_ATT_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+ if( psDec->prevSignalType == TYPE_VOICED ) {
+ rand_Gain_Q15 = PLC_RAND_ATTENUATE_V_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+ } else {
+ rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+ }
+
+ /* LPC concealment. Apply BWE to previous LPC */
+ silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) );
+
+ /* Preload LPC coeficients to array on stack. Gives small performance gain */
+ silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
+
+ /* First Lost frame */
+ if( psDec->lossCnt == 0 ) {
+ rand_scale_Q14 = 1 << 14;
+
+ /* Reduce random noise Gain for voiced frames */
+ if( psDec->prevSignalType == TYPE_VOICED ) {
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ rand_scale_Q14 -= B_Q14[ i ];
+ }
+ rand_scale_Q14 = silk_max_16( 3277, rand_scale_Q14 ); /* 0.2 */
+ rand_scale_Q14 = (opus_int16)silk_RSHIFT( silk_SMULBB( rand_scale_Q14, psPLC->prevLTP_scale_Q14 ), 14 );
+ } else {
+ /* Reduce random noise for unvoiced frames with high LPC gain */
+ opus_int32 invGain_Q30, down_scale_Q30;
+
+ invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order );
+
+ down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 );
+ down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 );
+ down_scale_Q30 = silk_LSHIFT( down_scale_Q30, LOG2_INV_LPC_GAIN_HIGH_THRES );
+
+ rand_Gain_Q15 = silk_RSHIFT( silk_SMULWB( down_scale_Q30, rand_Gain_Q15 ), 14 );
+ }
+ }
+
+ rand_seed = psPLC->rand_seed;
+ lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
+ sLTP_buf_idx = psDec->ltp_mem_length;
+
+ /* Rewhiten LTP state */
+ idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
+ silk_assert( idx > 0 );
+ silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order );
+ /* Scale LTP state */
+ inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
+ inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 );
+ for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) {
+ sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] );
+ }
+
+ /***************************/
+ /* LTP synthesis filtering */
+ /***************************/
+ for( k = 0; k < psDec->nb_subfr; k++ ) {
+ /* Set up pointer */
+ pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+ for( i = 0; i < psDec->subfr_length; i++ ) {
+ /* Unrolled loop */
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LTP_pred_Q12 = 2;
+ LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ 0 ], B_Q14[ 0 ] );
+ LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
+ LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
+ LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
+ LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
+ pred_lag_ptr++;
+
+ /* Generate LPC excitation */
+ rand_seed = silk_RAND( rand_seed );
+ idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK;
+ sLTP_Q14[ sLTP_buf_idx ] = silk_LSHIFT32( silk_SMLAWB( LTP_pred_Q12, rand_ptr[ idx ], rand_scale_Q14 ), 2 );
+ sLTP_buf_idx++;
+ }
+
+ /* Gradually reduce LTP gain */
+ for( j = 0; j < LTP_ORDER; j++ ) {
+ B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 );
+ }
+ /* Gradually reduce excitation gain */
+ rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
+
+ /* Slowly increase pitch lag */
+ psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 );
+ psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) );
+ lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
+ }
+
+ /***************************/
+ /* LPC synthesis filtering */
+ /***************************/
+ sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ];
+
+ /* Copy LPC state */
+ silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+ silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
+ for( i = 0; i < psDec->frame_length; i++ ) {
+ /* partly unrolled */
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+ for( j = 10; j < psDec->LPC_order; j++ ) {
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] );
+ }
+
+ /* Add prediction to LPC excitation */
+ sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+
+ /* Scale with Gain */
+ frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );
+ }
+
+ /* Save LPC state */
+ silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+ /**************************************/
+ /* Update states */
+ /**************************************/
+ psPLC->rand_seed = rand_seed;
+ psPLC->randScale_Q14 = rand_scale_Q14;
+ for( i = 0; i < MAX_NB_SUBFR; i++ ) {
+ psDecCtrl->pitchL[ i ] = lag;
+ }
+ RESTORE_STACK;
+}
+
+/* Glues concealed frames with new good received frames */
+void silk_PLC_glue_frames(
+ silk_decoder_state *psDec, /* I/O decoder state */
+ opus_int16 frame[], /* I/O signal */
+ opus_int length /* I length of signal */
+)
+{
+ opus_int i, energy_shift;
+ opus_int32 energy;
+ silk_PLC_struct *psPLC;
+ psPLC = &psDec->sPLC;
+
+ if( psDec->lossCnt ) {
+ /* Calculate energy in concealed residual */
+ silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, frame, length );
+
+ psPLC->last_frame_lost = 1;
+ } else {
+ if( psDec->sPLC.last_frame_lost ) {
+ /* Calculate residual in decoded signal if last frame was lost */
+ silk_sum_sqr_shift( &energy, &energy_shift, frame, length );
+
+ /* Normalize energies */
+ if( energy_shift > psPLC->conc_energy_shift ) {
+ psPLC->conc_energy = silk_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift );
+ } else if( energy_shift < psPLC->conc_energy_shift ) {
+ energy = silk_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift );
+ }
+
+ /* Fade in the energy difference */
+ if( energy > psPLC->conc_energy ) {
+ opus_int32 frac_Q24, LZ;
+ opus_int32 gain_Q16, slope_Q16;
+
+ LZ = silk_CLZ32( psPLC->conc_energy );
+ LZ = LZ - 1;
+ psPLC->conc_energy = silk_LSHIFT( psPLC->conc_energy, LZ );
+ energy = silk_RSHIFT( energy, silk_max_32( 24 - LZ, 0 ) );
+
+ frac_Q24 = silk_DIV32( psPLC->conc_energy, silk_max( energy, 1 ) );
+
+ gain_Q16 = silk_LSHIFT( silk_SQRT_APPROX( frac_Q24 ), 4 );
+ slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length );
+ /* Make slope 4x steeper to avoid missing onsets after DTX */
+ slope_Q16 = silk_LSHIFT( slope_Q16, 2 );
+
+ for( i = 0; i < length; i++ ) {
+ frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] );
+ gain_Q16 += slope_Q16;
+ if( gain_Q16 > (opus_int32)1 << 16 ) {
+ break;
+ }
+ }
+ }
+ }
+ psPLC->last_frame_lost = 0;
+ }
+}
diff --git a/drivers/opus/silk/PLC.h b/drivers/opus/silk/PLC.h
new file mode 100644
index 0000000000..f531cda950
--- /dev/null
+++ b/drivers/opus/silk/PLC.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_PLC_H
+#define SILK_PLC_H
+
+#include "silk_main.h"
+
+#define BWE_COEF 0.99
+#define V_PITCH_GAIN_START_MIN_Q14 11469 /* 0.7 in Q14 */
+#define V_PITCH_GAIN_START_MAX_Q14 15565 /* 0.95 in Q14 */
+#define MAX_PITCH_LAG_MS 18
+#define RAND_BUF_SIZE 128
+#define RAND_BUF_MASK ( RAND_BUF_SIZE - 1 )
+#define LOG2_INV_LPC_GAIN_HIGH_THRES 3 /* 2^3 = 8 dB LPC gain */
+#define LOG2_INV_LPC_GAIN_LOW_THRES 8 /* 2^8 = 24 dB LPC gain */
+#define PITCH_DRIFT_FAC_Q16 655 /* 0.01 in Q16 */
+
+void silk_PLC_Reset(
+ silk_decoder_state *psDec /* I/O Decoder state */
+);
+
+void silk_PLC(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int16 frame[], /* I/O signal */
+ opus_int lost /* I Loss flag */
+);
+
+void silk_PLC_glue_frames(
+ silk_decoder_state *psDec, /* I/O decoder state */
+ opus_int16 frame[], /* I/O signal */
+ opus_int length /* I length of signal */
+);
+
+#endif
+
diff --git a/drivers/opus/silk/SigProc_FIX.h b/drivers/opus/silk/SigProc_FIX.h
new file mode 100644
index 0000000000..1b58057910
--- /dev/null
+++ b/drivers/opus/silk/SigProc_FIX.h
@@ -0,0 +1,594 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_H
+#define SILK_SIGPROC_FIX_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */
+
+#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */
+
+#include <string.h> /* for memset(), memcpy(), memmove() */
+#include "typedef.h"
+#include "resampler_structs.h"
+#include "macros.h"
+
+
+/********************************************************************/
+/* SIGNAL PROCESSING FUNCTIONS */
+/********************************************************************/
+
+/*!
+ * Initialize/reset the resampler state for a given pair of input/output sampling rates
+*/
+opus_int silk_resampler_init(
+ silk_resampler_state_struct *S, /* I/O Resampler state */
+ opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */
+ opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */
+ opus_int forEnc /* I If 1: encoder; if 0: decoder */
+);
+
+/*!
+ * Resampler: convert from one sampling rate to another
+ */
+opus_int silk_resampler(
+ silk_resampler_state_struct *S, /* I/O Resampler state */
+ opus_int16 out[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ opus_int32 inLen /* I Number of input samples */
+);
+
+/*!
+* Downsample 2x, mediocre quality
+*/
+void silk_resampler_down2(
+ opus_int32 *S, /* I/O State vector [ 2 ] */
+ opus_int16 *out, /* O Output signal [ len ] */
+ const opus_int16 *in, /* I Input signal [ floor(len/2) ] */
+ opus_int32 inLen /* I Number of input samples */
+);
+
+/*!
+ * Downsample by a factor 2/3, low quality
+*/
+void silk_resampler_down2_3(
+ opus_int32 *S, /* I/O State vector [ 6 ] */
+ opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */
+ const opus_int16 *in, /* I Input signal [ inLen ] */
+ opus_int32 inLen /* I Number of input samples */
+);
+
+/*!
+ * second order ARMA filter;
+ * slower than biquad() but uses more precise coefficients
+ * can handle (slowly) varying coefficients
+ */
+void silk_biquad_alt(
+ const opus_int16 *in, /* I input signal */
+ const opus_int32 *B_Q28, /* I MA coefficients [3] */
+ const opus_int32 *A_Q28, /* I AR coefficients [2] */
+ opus_int32 *S, /* I/O State vector [2] */
+ opus_int16 *out, /* O output signal */
+ const opus_int32 len, /* I signal length (must be even) */
+ opus_int stride /* I Operate on interleaved signal if > 1 */
+);
+
+/* Variable order MA prediction error filter. */
+void silk_LPC_analysis_filter(
+ opus_int16 *out, /* O Output signal */
+ const opus_int16 *in, /* I Input signal */
+ const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
+ const opus_int32 len, /* I Signal length */
+ const opus_int32 d /* I Filter order */
+);
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander(
+ opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */
+ const opus_int d, /* I Length of ar */
+ opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */
+);
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander_32(
+ opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */
+ const opus_int d, /* I Length of ar */
+ opus_int32 chirp_Q16 /* I Chirp factor in Q16 */
+);
+
+/* Compute inverse of LPC prediction gain, and */
+/* test if LPC coefficients are stable (all poles within unit circle) */
+opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */
+ const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
+ const opus_int order /* I Prediction order */
+);
+
+/* For input in Q24 domain */
+opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */
+ const opus_int32 *A_Q24, /* I Prediction coefficients [order] */
+ const opus_int order /* I Prediction order */
+);
+
+/* Split signal in two decimated bands using first-order allpass filters */
+void silk_ana_filt_bank_1(
+ const opus_int16 *in, /* I Input signal [N] */
+ opus_int32 *S, /* I/O State vector [2] */
+ opus_int16 *outL, /* O Low band [N/2] */
+ opus_int16 *outH, /* O High band [N/2] */
+ const opus_int32 N /* I Number of input samples */
+);
+
+/********************************************************************/
+/* SCALAR FUNCTIONS */
+/********************************************************************/
+
+/* Approximation of 128 * log2() (exact inverse of approx 2^() below) */
+/* Convert input to a log scale */
+opus_int32 silk_lin2log(
+ const opus_int32 inLin /* I input in linear scale */
+);
+
+/* Approximation of a sigmoid function */
+opus_int silk_sigm_Q15(
+ opus_int in_Q5 /* I */
+);
+
+/* Approximation of 2^() (exact inverse of approx log2() above) */
+/* Convert input to a linear scale */
+opus_int32 silk_log2lin(
+ const opus_int32 inLog_Q7 /* I input on log scale */
+);
+
+/* Compute number of bits to right shift the sum of squares of a vector */
+/* of int16s to make it fit in an int32 */
+void silk_sum_sqr_shift(
+ opus_int32 *energy, /* O Energy of x, after shifting to the right */
+ opus_int *shift, /* O Number of bits right shift applied to energy */
+ const opus_int16 *x, /* I Input vector */
+ opus_int len /* I Length of input vector */
+);
+
+/* Calculates the reflection coefficients from the correlation sequence */
+/* Faster than schur64(), but much less accurate. */
+/* uses SMLAWB(), requiring armv5E and higher. */
+opus_int32 silk_schur( /* O Returns residual energy */
+ opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */
+ const opus_int32 *c, /* I correlations [order+1] */
+ const opus_int32 order /* I prediction order */
+);
+
+/* Calculates the reflection coefficients from the correlation sequence */
+/* Slower than schur(), but more accurate. */
+/* Uses SMULL(), available on armv4 */
+opus_int32 silk_schur64( /* O returns residual energy */
+ opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */
+ const opus_int32 c[], /* I Correlations [order+1] */
+ opus_int32 order /* I Prediction order */
+);
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a(
+ opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
+ const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */
+ const opus_int32 order /* I Prediction order */
+);
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_Q16(
+ opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
+ const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */
+ const opus_int32 order /* I Prediction order */
+);
+
+/* Apply sine window to signal vector. */
+/* Window types: */
+/* 1 -> sine window from 0 to pi/2 */
+/* 2 -> sine window from pi/2 to pi */
+/* every other sample of window is linearly interpolated, for speed */
+void silk_apply_sine_window(
+ opus_int16 px_win[], /* O Pointer to windowed signal */
+ const opus_int16 px[], /* I Pointer to input signal */
+ const opus_int win_type, /* I Selects a window type */
+ const opus_int length /* I Window length, multiple of 4 */
+);
+
+/* Compute autocorrelation */
+void silk_autocorr(
+ opus_int32 *results, /* O Result (length correlationCount) */
+ opus_int *scale, /* O Scaling of the correlation vector */
+ const opus_int16 *inputData, /* I Input data to correlate */
+ const opus_int inputDataSize, /* I Length of input */
+ const opus_int correlationCount, /* I Number of correlation taps to compute */
+ int arch /* I Run-time architecture */
+);
+
+void silk_decode_pitch(
+ opus_int16 lagIndex, /* I */
+ opus_int8 contourIndex, /* O */
+ opus_int pitch_lags[], /* O 4 pitch values */
+ const opus_int Fs_kHz, /* I sampling frequency (kHz) */
+ const opus_int nb_subfr /* I number of sub frames */
+);
+
+opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
+ const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
+ opus_int *pitch_out, /* O 4 pitch lag values */
+ opus_int16 *lagIndex, /* O Lag Index */
+ opus_int8 *contourIndex, /* O Pitch contour Index */
+ opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
+ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
+ const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
+ const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
+ const opus_int Fs_kHz, /* I Sample frequency (kHz) */
+ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
+ const opus_int nb_subfr, /* I number of 5 ms subframes */
+ int arch /* I Run-time architecture */
+);
+
+/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */
+/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
+void silk_A2NLSF(
+ opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
+ opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */
+ const opus_int d /* I Filter order (must be even) */
+);
+
+/* compute whitening filter coefficients from normalized line spectral frequencies */
+void silk_NLSF2A(
+ opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
+ const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
+ const opus_int d /* I filter order (should be even) */
+);
+
+void silk_insertion_sort_increasing(
+ opus_int32 *a, /* I/O Unsorted / Sorted vector */
+ opus_int *idx, /* O Index vector for the sorted elements */
+ const opus_int L, /* I Vector length */
+ const opus_int K /* I Number of correctly sorted positions */
+);
+
+void silk_insertion_sort_decreasing_int16(
+ opus_int16 *a, /* I/O Unsorted / Sorted vector */
+ opus_int *idx, /* O Index vector for the sorted elements */
+ const opus_int L, /* I Vector length */
+ const opus_int K /* I Number of correctly sorted positions */
+);
+
+void silk_insertion_sort_increasing_all_values_int16(
+ opus_int16 *a, /* I/O Unsorted / Sorted vector */
+ const opus_int L /* I Vector length */
+);
+
+/* NLSF stabilizer, for a single input data vector */
+void silk_NLSF_stabilize(
+ opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */
+ const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */
+ const opus_int L /* I Number of NLSF parameters in the input vector */
+);
+
+/* Laroia low complexity NLSF weights */
+void silk_NLSF_VQ_weights_laroia(
+ opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */
+ const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */
+ const opus_int D /* I Input vector dimension (even) */
+);
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified(
+ opus_int32 *res_nrg, /* O Residual energy */
+ opus_int *res_nrg_Q, /* O Residual energy Q value */
+ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
+ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */
+ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */
+ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */
+ const opus_int nb_subfr, /* I Number of subframes stacked in x */
+ const opus_int D, /* I Order */
+ int arch /* I Run-time architecture */
+);
+
+/* Copy and multiply a vector by a constant */
+void silk_scale_copy_vector16(
+ opus_int16 *data_out,
+ const opus_int16 *data_in,
+ opus_int32 gain_Q16, /* I Gain in Q16 */
+ const opus_int dataSize /* I Length */
+);
+
+/* Some for the LTP related function requires Q26 to work.*/
+void silk_scale_vector32_Q26_lshift_18(
+ opus_int32 *data1, /* I/O Q0/Q18 */
+ opus_int32 gain_Q26, /* I Q26 */
+ opus_int dataSize /* I length */
+);
+
+/********************************************************************/
+/* INLINE ARM MATH */
+/********************************************************************/
+
+/* return sum( inVec1[i] * inVec2[i] ) */
+opus_int32 silk_inner_prod_aligned(
+ const opus_int16 *const inVec1, /* I input vector 1 */
+ const opus_int16 *const inVec2, /* I input vector 2 */
+ const opus_int len /* I vector lengths */
+);
+
+opus_int32 silk_inner_prod_aligned_scale(
+ const opus_int16 *const inVec1, /* I input vector 1 */
+ const opus_int16 *const inVec2, /* I input vector 2 */
+ const opus_int scale, /* I number of bits to shift */
+ const opus_int len /* I vector lengths */
+);
+
+opus_int64 silk_inner_prod16_aligned_64(
+ const opus_int16 *inVec1, /* I input vector 1 */
+ const opus_int16 *inVec2, /* I input vector 2 */
+ const opus_int len /* I vector lengths */
+);
+
+/********************************************************************/
+/* MACROS */
+/********************************************************************/
+
+/* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
+ left. Output is 32bit int.
+ Note: contemporary compilers recognize the C expression below and
+ compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
+static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
+{
+ opus_uint32 x = (opus_uint32) a32;
+ opus_uint32 r = (opus_uint32) rot;
+ opus_uint32 m = (opus_uint32) -rot;
+ if( rot == 0 ) {
+ return a32;
+ } else if( rot < 0 ) {
+ return (opus_int32) ((x << m) | (x >> (32 - m)));
+ } else {
+ return (opus_int32) ((x << (32 - r)) | (x >> r));
+ }
+}
+
+/* Allocate opus_int16 aligned to 4-byte memory address */
+#if EMBEDDED_ARM
+#define silk_DWORD_ALIGN __attribute__((aligned(4)))
+#else
+#define silk_DWORD_ALIGN
+#endif
+
+/* Useful Macros that can be adjusted to other platforms */
+#define silk_memcpy(dest, src, size) memcpy((dest), (src), (size))
+#define silk_memset(dest, src, size) memset((dest), (src), (size))
+#define silk_memmove(dest, src, size) memmove((dest), (src), (size))
+
+/* Fixed point macros */
+
+/* (a32 * b32) output have to be 32bit int */
+#define silk_MUL(a32, b32) ((a32) * (b32))
+
+/* (a32 * b32) output have to be 32bit uint */
+#define silk_MUL_uint(a32, b32) silk_MUL(a32, b32)
+
+/* a32 + (b32 * c32) output have to be 32bit int */
+#define silk_MLA(a32, b32, c32) silk_ADD32((a32),((b32) * (c32)))
+
+/* a32 + (b32 * c32) output have to be 32bit uint */
+#define silk_MLA_uint(a32, b32, c32) silk_MLA(a32, b32, c32)
+
+/* ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */
+#define silk_SMULTT(a32, b32) (((a32) >> 16) * ((b32) >> 16))
+
+/* a32 + ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */
+#define silk_SMLATT(a32, b32, c32) silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16))
+
+#define silk_SMLALBB(a64, b16, c16) silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16)))
+
+/* (a32 * b32) */
+#define silk_SMULL(a32, b32) ((opus_int64)(a32) * /*(opus_int64)*/(b32))
+
+/* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
+ (just standard two's complement implementation-specific behaviour) */
+#define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
+/* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
+ (just standard two's complement implementation-specific behaviour) */
+#define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
+
+/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
+#define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
+#define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
+
+#define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16)))
+#define silk_DIV32(a32, b32) ((opus_int32)((a32) / (b32)))
+
+/* These macros enables checking for overflow in silk_API_Debug.h*/
+#define silk_ADD16(a, b) ((a) + (b))
+#define silk_ADD32(a, b) ((a) + (b))
+#define silk_ADD64(a, b) ((a) + (b))
+
+#define silk_SUB16(a, b) ((a) - (b))
+#define silk_SUB32(a, b) ((a) - (b))
+#define silk_SUB64(a, b) ((a) - (b))
+
+#define silk_SAT8(a) ((a) > silk_int8_MAX ? silk_int8_MAX : \
+ ((a) < silk_int8_MIN ? silk_int8_MIN : (a)))
+#define silk_SAT16(a) ((a) > silk_int16_MAX ? silk_int16_MAX : \
+ ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
+#define silk_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : \
+ ((a) < silk_int32_MIN ? silk_int32_MIN : (a)))
+
+#define silk_CHECK_FIT8(a) (a)
+#define silk_CHECK_FIT16(a) (a)
+#define silk_CHECK_FIT32(a) (a)
+
+#define silk_ADD_SAT16(a, b) (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) )
+#define silk_ADD_SAT64(a, b) ((((a) + (b)) & 0x8000000000000000LL) == 0 ? \
+ ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \
+ ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) )
+
+#define silk_SUB_SAT16(a, b) (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) )
+#define silk_SUB_SAT64(a, b) ((((a)-(b)) & 0x8000000000000000LL) == 0 ? \
+ (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \
+ ((((a)^0x8000000000000000LL) & (b) & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) )
+
+/* Saturation for positive input values */
+#define silk_POS_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : (a))
+
+/* Add with saturation for positive input values */
+#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)))
+#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)))
+#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
+#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)))
+
+#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */
+#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */
+#define silk_LSHIFT32(a, shift) ((opus_int32)((opus_uint32)(a)<<(shift))) /* shift >= 0, shift < 32 */
+#define silk_LSHIFT64(a, shift) ((opus_int64)((opus_uint64)(a)<<(shift))) /* shift >= 0, shift < 64 */
+#define silk_LSHIFT(a, shift) silk_LSHIFT32(a, shift) /* shift >= 0, shift < 32 */
+
+#define silk_RSHIFT8(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 8 */
+#define silk_RSHIFT16(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 16 */
+#define silk_RSHIFT32(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 32 */
+#define silk_RSHIFT64(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 64 */
+#define silk_RSHIFT(a, shift) silk_RSHIFT32(a, shift) /* shift >= 0, shift < 32 */
+
+/* saturates before shifting */
+#define silk_LSHIFT_SAT32(a, shift) (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \
+ silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
+
+#define silk_LSHIFT_ovflw(a, shift) ((opus_int32)((opus_uint32)(a) << (shift))) /* shift >= 0, allowed to overflow */
+#define silk_LSHIFT_uint(a, shift) ((a) << (shift)) /* shift >= 0 */
+#define silk_RSHIFT_uint(a, shift) ((a) >> (shift)) /* shift >= 0 */
+
+#define silk_ADD_LSHIFT(a, b, shift) ((a) + silk_LSHIFT((b), (shift))) /* shift >= 0 */
+#define silk_ADD_LSHIFT32(a, b, shift) silk_ADD32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */
+#define silk_ADD_LSHIFT_uint(a, b, shift) ((a) + silk_LSHIFT_uint((b), (shift))) /* shift >= 0 */
+#define silk_ADD_RSHIFT(a, b, shift) ((a) + silk_RSHIFT((b), (shift))) /* shift >= 0 */
+#define silk_ADD_RSHIFT32(a, b, shift) silk_ADD32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */
+#define silk_ADD_RSHIFT_uint(a, b, shift) ((a) + silk_RSHIFT_uint((b), (shift))) /* shift >= 0 */
+#define silk_SUB_LSHIFT32(a, b, shift) silk_SUB32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */
+#define silk_SUB_RSHIFT32(a, b, shift) silk_SUB32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */
+
+/* Requires that shift > 0 */
+#define silk_RSHIFT_ROUND(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
+#define silk_RSHIFT_ROUND64(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
+
+/* Number of rightshift required to fit the multiplication */
+#define silk_NSHIFT_MUL_32_32(a, b) ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) )
+#define silk_NSHIFT_MUL_16_16(a, b) ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) )
+
+
+#define silk_min(a, b) (((a) < (b)) ? (a) : (b))
+#define silk_max(a, b) (((a) > (b)) ? (a) : (b))
+
+/* Macro to convert floating-point constants to fixed-point */
+#define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
+
+/* silk_min() versions with typecast in the function call */
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
+{
+ return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+{
+ return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+{
+ return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+{
+ return (((a) < (b)) ? (a) : (b));
+}
+
+/* silk_min() versions with typecast in the function call */
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
+{
+ return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+{
+ return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+{
+ return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+{
+ return (((a) > (b)) ? (a) : (b));
+}
+
+#define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
+
+#define silk_LIMIT_int silk_LIMIT
+#define silk_LIMIT_16 silk_LIMIT
+#define silk_LIMIT_32 silk_LIMIT
+
+#define silk_abs(a) (((a) > 0) ? (a) : -(a)) /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
+#define silk_abs_int(a) (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1)))
+#define silk_abs_int32(a) (((a) ^ ((a) >> 31)) - ((a) >> 31))
+#define silk_abs_int64(a) (((a) > 0) ? (a) : -(a))
+
+#define silk_sign(a) ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ))
+
+/* PSEUDO-RANDOM GENERATOR */
+/* Make sure to store the result as the seed for the next call (also in between */
+/* frames), otherwise result won't be random at all. When only using some of the */
+/* bits, take the most significant bits by right-shifting. */
+#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165))
+
+/* Add some multiplication functions that can be easily mapped to ARM. */
+
+/* silk_SMMUL: Signed top word multiply.
+ ARMv6 2 instruction cycles.
+ ARMv3M+ 3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
+/*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
+/* the following seems faster on x86 */
+#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
+
+#include "Inlines.h"
+#include "MacroCount.h"
+#include "MacroDebug.h"
+
+#ifdef OPUS_ARM_INLINE_ASM
+#include "arm/SigProc_FIX_armv4.h"
+#endif
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/SigProc_FIX_armv5e.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_SIGPROC_FIX_H */
diff --git a/drivers/opus/silk/VAD.c b/drivers/opus/silk/VAD.c
new file mode 100644
index 0000000000..3a5c566627
--- /dev/null
+++ b/drivers/opus/silk/VAD.c
@@ -0,0 +1,357 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/* Silk VAD noise level estimation */
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
+ const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */
+ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
+);
+
+/**********************************/
+/* Initialization of the Silk VAD */
+/**********************************/
+opus_int silk_VAD_Init( /* O Return value, 0 if success */
+ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
+)
+{
+ opus_int b, ret = 0;
+
+ /* reset state memory */
+ silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) );
+
+ /* init noise levels */
+ /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */
+ for( b = 0; b < VAD_N_BANDS; b++ ) {
+ psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 );
+ }
+
+ /* Initialize state */
+ for( b = 0; b < VAD_N_BANDS; b++ ) {
+ psSilk_VAD->NL[ b ] = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] );
+ psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] );
+ }
+ psSilk_VAD->counter = 15;
+
+ /* init smoothed energy-to-noise ratio*/
+ for( b = 0; b < VAD_N_BANDS; b++ ) {
+ psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256; /* 100 * 256 --> 20 dB SNR */
+ }
+
+ return( ret );
+}
+
+/* Weighting factors for tilt measure */
+static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 };
+
+/***************************************/
+/* Get the speech activity level in Q8 */
+/***************************************/
+opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ const opus_int16 pIn[] /* I PCM input */
+)
+{
+ opus_int SA_Q15, pSNR_dB_Q7, input_tilt;
+ opus_int decimated_framelength1, decimated_framelength2;
+ opus_int decimated_framelength;
+ opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
+ opus_int32 sumSquared, smooth_coef_Q16;
+ opus_int16 HPstateTmp;
+ VARDECL( opus_int16, X );
+ opus_int32 Xnrg[ VAD_N_BANDS ];
+ opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
+ opus_int32 speech_nrg, x_tmp;
+ opus_int X_offset[ VAD_N_BANDS ];
+ opus_int ret = 0;
+ silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
+ SAVE_STACK;
+
+ /* Safety checks */
+ silk_assert( VAD_N_BANDS == 4 );
+ silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
+ silk_assert( psEncC->frame_length <= 512 );
+ silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
+
+ /***********************/
+ /* Filter and Decimate */
+ /***********************/
+ decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
+ decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
+ decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
+ /* Decimate into 4 bands:
+ 0 L 3L L 3L 5L
+ - -- - -- --
+ 8 8 2 4 4
+
+ [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz |
+
+ They're arranged to allow the minimal ( frame_length / 4 ) extra
+ scratch space during the downsampling process */
+ X_offset[ 0 ] = 0;
+ X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
+ X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
+ X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
+ ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
+
+ /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
+ silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ],
+ X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
+
+ /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
+ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
+ X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
+
+ /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
+ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
+ X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
+
+ /*********************************************/
+ /* HP filter on lowest band (differentiator) */
+ /*********************************************/
+ X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
+ HPstateTmp = X[ decimated_framelength - 1 ];
+ for( i = decimated_framelength - 1; i > 0; i-- ) {
+ X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 );
+ X[ i ] -= X[ i - 1 ];
+ }
+ X[ 0 ] -= psSilk_VAD->HPstate;
+ psSilk_VAD->HPstate = HPstateTmp;
+
+ /*************************************/
+ /* Calculate the energy in each band */
+ /*************************************/
+ for( b = 0; b < VAD_N_BANDS; b++ ) {
+ /* Find the decimated framelength in the non-uniformly divided bands */
+ decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
+
+ /* Split length into subframe lengths */
+ dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
+ dec_subframe_offset = 0;
+
+ /* Compute energy per sub-frame */
+ /* initialize with summed energy of last subframe */
+ Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
+ for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
+ sumSquared = 0;
+ for( i = 0; i < dec_subframe_length; i++ ) {
+ /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */
+ /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */
+ x_tmp = silk_RSHIFT(
+ X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
+ sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
+
+ /* Safety check */
+ silk_assert( sumSquared >= 0 );
+ }
+
+ /* Add/saturate summed energy of current subframe */
+ if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
+ Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
+ } else {
+ /* Look-ahead subframe */
+ Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
+ }
+
+ dec_subframe_offset += dec_subframe_length;
+ }
+ psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
+ }
+
+ /********************/
+ /* Noise estimation */
+ /********************/
+ silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
+
+ /***********************************************/
+ /* Signal-plus-noise to noise ratio estimation */
+ /***********************************************/
+ sumSquared = 0;
+ input_tilt = 0;
+ for( b = 0; b < VAD_N_BANDS; b++ ) {
+ speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
+ if( speech_nrg > 0 ) {
+ /* Divide, with sufficient resolution */
+ if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
+ NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
+ } else {
+ NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
+ }
+
+ /* Convert to log domain */
+ SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
+
+ /* Sum-of-squares */
+ sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */
+
+ /* Tilt measure */
+ if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
+ /* Scale down SNR value for small subband speech energies */
+ SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
+ }
+ input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
+ } else {
+ NrgToNoiseRatio_Q8[ b ] = 256;
+ }
+ }
+
+ /* Mean-of-squares */
+ sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
+
+ /* Root-mean-square approximation, scale to dBs, and write to output pointer */
+ pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
+
+ /*********************************/
+ /* Speech Probability Estimation */
+ /*********************************/
+ SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
+
+ /**************************/
+ /* Frequency Tilt Measure */
+ /**************************/
+ psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
+
+ /**************************************************/
+ /* Scale the sigmoid output based on power levels */
+ /**************************************************/
+ speech_nrg = 0;
+ for( b = 0; b < VAD_N_BANDS; b++ ) {
+ /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
+ speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
+ }
+
+ /* Power scaling */
+ if( speech_nrg <= 0 ) {
+ SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
+ } else if( speech_nrg < 32768 ) {
+ if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+ speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );
+ } else {
+ speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );
+ }
+
+ /* square-root */
+ speech_nrg = silk_SQRT_APPROX( speech_nrg );
+ SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
+ }
+
+ /* Copy the resulting speech activity in Q8 */
+ psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
+
+ /***********************************/
+ /* Energy Level and SNR estimation */
+ /***********************************/
+ /* Smoothing coefficient */
+ smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
+
+ if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+ smooth_coef_Q16 >>= 1;
+ }
+
+ for( b = 0; b < VAD_N_BANDS; b++ ) {
+ /* compute smoothed energy-to-noise ratio per band */
+ psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
+ NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
+
+ /* signal to noise ratio in dB per band */
+ SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
+ /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
+ psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
+ }
+
+ RESTORE_STACK;
+ return( ret );
+}
+
+/**************************/
+/* Noise level estimation */
+/**************************/
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
+ const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */
+ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
+)
+{
+ opus_int k;
+ opus_int32 nl, nrg, inv_nrg;
+ opus_int coef, min_coef;
+
+ /* Initially faster smoothing */
+ if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */
+ min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 );
+ } else {
+ min_coef = 0;
+ }
+
+ for( k = 0; k < VAD_N_BANDS; k++ ) {
+ /* Get old noise level estimate for current band */
+ nl = psSilk_VAD->NL[ k ];
+ silk_assert( nl >= 0 );
+
+ /* Add bias */
+ nrg = silk_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] );
+ silk_assert( nrg > 0 );
+
+ /* Invert energies */
+ inv_nrg = silk_DIV32( silk_int32_MAX, nrg );
+ silk_assert( inv_nrg >= 0 );
+
+ /* Less update when subband energy is high */
+ if( nrg > silk_LSHIFT( nl, 3 ) ) {
+ coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3;
+ } else if( nrg < nl ) {
+ coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16;
+ } else {
+ coef = silk_SMULWB( silk_SMULWW( inv_nrg, nl ), VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 << 1 );
+ }
+
+ /* Initially faster smoothing */
+ coef = silk_max_int( coef, min_coef );
+
+ /* Smooth inverse energies */
+ psSilk_VAD->inv_NL[ k ] = silk_SMLAWB( psSilk_VAD->inv_NL[ k ], inv_nrg - psSilk_VAD->inv_NL[ k ], coef );
+ silk_assert( psSilk_VAD->inv_NL[ k ] >= 0 );
+
+ /* Compute noise level by inverting again */
+ nl = silk_DIV32( silk_int32_MAX, psSilk_VAD->inv_NL[ k ] );
+ silk_assert( nl >= 0 );
+
+ /* Limit noise levels (guarantee 7 bits of head room) */
+ nl = silk_min( nl, 0x00FFFFFF );
+
+ /* Store as part of state */
+ psSilk_VAD->NL[ k ] = nl;
+ }
+
+ /* Increment frame counter */
+ psSilk_VAD->counter++;
+}
diff --git a/drivers/opus/silk/VQ_WMat_EC.c b/drivers/opus/silk/VQ_WMat_EC.c
new file mode 100644
index 0000000000..28c5fc7e6f
--- /dev/null
+++ b/drivers/opus/silk/VQ_WMat_EC.c
@@ -0,0 +1,120 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
+void silk_VQ_WMat_EC(
+ opus_int8 *ind, /* O index of best codebook vector */
+ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
+ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */
+ const opus_int16 *in_Q14, /* I input vector to be quantized */
+ const opus_int32 *W_Q18, /* I weighting matrix */
+ const opus_int8 *cb_Q7, /* I codebook */
+ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */
+ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */
+ const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */
+ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
+ opus_int L /* I number of vectors in codebook */
+)
+{
+ opus_int k, gain_tmp_Q7;
+ const opus_int8 *cb_row_Q7;
+ opus_int16 diff_Q14[ 5 ];
+ opus_int32 sum1_Q14, sum2_Q16;
+
+ /* Loop over codebook */
+ *rate_dist_Q14 = silk_int32_MAX;
+ cb_row_Q7 = cb_Q7;
+ for( k = 0; k < L; k++ ) {
+ gain_tmp_Q7 = cb_gain_Q7[k];
+
+ diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
+ diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
+ diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );
+ diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );
+ diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );
+
+ /* Weighted rate */
+ sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
+
+ /* Penalty for too large gain */
+ sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+
+ silk_assert( sum1_Q14 >= 0 );
+
+ /* first row of W_Q18 */
+ sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] );
+ sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] );
+ sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] );
+
+ /* second row of W_Q18 */
+ sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] );
+ sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] );
+ sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] );
+
+ /* third row of W_Q18 */
+ sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );
+ sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );
+ sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] );
+
+ /* fourth row of W_Q18 */
+ sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] );
+ sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+ sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );
+ sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] );
+
+ /* last row of W_Q18 */
+ sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] );
+ sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] );
+
+ silk_assert( sum1_Q14 >= 0 );
+
+ /* find best */
+ if( sum1_Q14 < *rate_dist_Q14 ) {
+ *rate_dist_Q14 = sum1_Q14;
+ *ind = (opus_int8)k;
+ *gain_Q7 = gain_tmp_Q7;
+ }
+
+ /* Go to next cbk vector */
+ cb_row_Q7 += LTP_ORDER;
+ }
+}
diff --git a/drivers/opus/silk/ana_filt_bank_1.c b/drivers/opus/silk/ana_filt_bank_1.c
new file mode 100644
index 0000000000..387dcd87e7
--- /dev/null
+++ b/drivers/opus/silk/ana_filt_bank_1.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Coefficients for 2-band filter bank based on first-order allpass filters */
+static opus_int16 A_fb1_20 = 5394 << 1;
+static opus_int16 A_fb1_21 = -24290; /* (opus_int16)(20623 << 1) */
+
+/* Split signal into two decimated bands using first-order allpass filters */
+void silk_ana_filt_bank_1(
+ const opus_int16 *in, /* I Input signal [N] */
+ opus_int32 *S, /* I/O State vector [2] */
+ opus_int16 *outL, /* O Low band [N/2] */
+ opus_int16 *outH, /* O High band [N/2] */
+ const opus_int32 N /* I Number of input samples */
+)
+{
+ opus_int k, N2 = silk_RSHIFT( N, 1 );
+ opus_int32 in32, X, Y, out_1, out_2;
+
+ /* Internal variables and state are in Q10 format */
+ for( k = 0; k < N2; k++ ) {
+ /* Convert to Q10 */
+ in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 );
+
+ /* All-pass section for even input sample */
+ Y = silk_SUB32( in32, S[ 0 ] );
+ X = silk_SMLAWB( Y, Y, A_fb1_21 );
+ out_1 = silk_ADD32( S[ 0 ], X );
+ S[ 0 ] = silk_ADD32( in32, X );
+
+ /* Convert to Q10 */
+ in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 );
+
+ /* All-pass section for odd input sample, and add to output of previous section */
+ Y = silk_SUB32( in32, S[ 1 ] );
+ X = silk_SMULWB( Y, A_fb1_20 );
+ out_2 = silk_ADD32( S[ 1 ], X );
+ S[ 1 ] = silk_ADD32( in32, X );
+
+ /* Add/subtract, convert back to int16 and store to output */
+ outL[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_ADD32( out_2, out_1 ), 11 ) );
+ outH[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SUB32( out_2, out_1 ), 11 ) );
+ }
+}
diff --git a/drivers/opus/silk/arm/SigProc_FIX_armv4.h b/drivers/opus/silk/arm/SigProc_FIX_armv4.h
new file mode 100644
index 0000000000..ff62b1e5d6
--- /dev/null
+++ b/drivers/opus/silk/arm/SigProc_FIX_armv4.h
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors
+Copyright (c) 2013 Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv4_H
+#define SILK_SIGPROC_FIX_ARMv4_H
+
+#undef silk_MLA
+static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ opus_int32 res;
+ __asm__(
+ "#silk_MLA\n\t"
+ "mla %0, %1, %2, %3\n\t"
+ : "=&r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c))
+
+#endif
diff --git a/drivers/opus/silk/arm/SigProc_FIX_armv5e.h b/drivers/opus/silk/arm/SigProc_FIX_armv5e.h
new file mode 100644
index 0000000000..617a09cab1
--- /dev/null
+++ b/drivers/opus/silk/arm/SigProc_FIX_armv5e.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013 Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv5E_H
+#define SILK_SIGPROC_FIX_ARMv5E_H
+
+#undef silk_SMULTT
+static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
+{
+ opus_int32 res;
+ __asm__(
+ "#silk_SMULTT\n\t"
+ "smultt %0, %1, %2\n\t"
+ : "=r"(res)
+ : "%r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))
+
+#undef silk_SMLATT
+static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ opus_int32 res;
+ __asm__(
+ "#silk_SMLATT\n\t"
+ "smlatt %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "%r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c))
+
+#endif
diff --git a/drivers/opus/silk/arm/macros_armv4.h b/drivers/opus/silk/arm/macros_armv4.h
new file mode 100644
index 0000000000..3f30e97288
--- /dev/null
+++ b/drivers/opus/silk/arm/macros_armv4.h
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv4_H
+#define SILK_MACROS_ARMv4_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMULWB\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=&r"(rd_lo), "=&r"(rd_hi)
+ : "%r"(a), "r"(b<<16)
+ );
+ return rd_hi;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMULWT\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=&r"(rd_lo), "=&r"(rd_hi)
+ : "%r"(a), "r"(b&~0xFFFF)
+ );
+ return rd_hi;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c))
+
+/* (a32 * b32) >> 16 */
+#undef silk_SMULWW
+static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMULWW\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=&r"(rd_lo), "=&r"(rd_hi)
+ : "%r"(a), "r"(b)
+ );
+ return (rd_hi<<16)+(rd_lo>>16);
+}
+#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
+
+#undef silk_SMLAWW
+static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMLAWW\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=&r"(rd_lo), "=&r"(rd_hi)
+ : "%r"(b), "r"(c)
+ );
+ return a+(rd_hi<<16)+(rd_lo>>16);
+}
+#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
+
+#endif /* SILK_MACROS_ARMv4_H */
diff --git a/drivers/opus/silk/arm/macros_armv5e.h b/drivers/opus/silk/arm/macros_armv5e.h
new file mode 100644
index 0000000000..aad4117e46
--- /dev/null
+++ b/drivers/opus/silk/arm/macros_armv5e.h
@@ -0,0 +1,213 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013 Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv5E_H
+#define SILK_MACROS_ARMv5E_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULWB\n\t"
+ "smulwb %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int16 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLAWB\n\t"
+ "smlawb %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULWT\n\t"
+ "smulwt %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLAWT\n\t"
+ "smlawt %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+#undef silk_SMULBB
+static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULBB\n\t"
+ "smulbb %0, %1, %2\n\t"
+ : "=r"(res)
+ : "%r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
+#undef silk_SMLABB
+static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLABB\n\t"
+ "smlabb %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "%r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
+#undef silk_SMULBT
+static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULBT\n\t"
+ "smulbt %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
+#undef silk_SMLABT
+static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLABT\n\t"
+ "smlabt %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))
+
+/* add/subtract with output saturated */
+#undef silk_ADD_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_ADD_SAT32\n\t"
+ "qadd %0, %1, %2\n\t"
+ : "=r"(res)
+ : "%r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))
+
+#undef silk_SUB_SAT32
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SUB_SAT32\n\t"
+ "qsub %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))
+
+#undef silk_CLZ16
+static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
+{
+ int res;
+ __asm__(
+ "#silk_CLZ16\n\t"
+ "clz %0, %1;\n"
+ : "=r"(res)
+ : "r"(in16<<16|0x8000)
+ );
+ return res;
+}
+#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))
+
+#undef silk_CLZ32
+static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
+{
+ int res;
+ __asm__(
+ "#silk_CLZ32\n\t"
+ "clz %0, %1\n\t"
+ : "=r"(res)
+ : "r"(in32)
+ );
+ return res;
+}
+#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))
+
+#endif /* SILK_MACROS_ARMv5E_H */
diff --git a/drivers/opus/silk/biquad_alt.c b/drivers/opus/silk/biquad_alt.c
new file mode 100644
index 0000000000..5f1d6d25c3
--- /dev/null
+++ b/drivers/opus/silk/biquad_alt.c
@@ -0,0 +1,78 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/* *
+ * silk_biquad_alt.c *
+ * *
+ * Second order ARMA filter *
+ * Can handle slowly varying filter coefficients *
+ * */
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Second order ARMA filter, alternative implementation */
+void silk_biquad_alt(
+ const opus_int16 *in, /* I input signal */
+ const opus_int32 *B_Q28, /* I MA coefficients [3] */
+ const opus_int32 *A_Q28, /* I AR coefficients [2] */
+ opus_int32 *S, /* I/O State vector [2] */
+ opus_int16 *out, /* O output signal */
+ const opus_int32 len, /* I signal length (must be even) */
+ opus_int stride /* I Operate on interleaved signal if > 1 */
+)
+{
+ /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
+ opus_int k;
+ opus_int32 inval, A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14;
+
+ /* Negate A_Q28 values and split in two parts */
+ A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */
+ A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */
+ A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */
+ A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */
+
+ for( k = 0; k < len; k++ ) {
+ /* S[ 0 ], S[ 1 ]: Q12 */
+ inval = in[ k * stride ];
+ out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 );
+
+ S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 );
+ S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14, A0_U_Q28 );
+ S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], inval);
+
+ S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A1_L_Q28 ), 14 );
+ S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14, A1_U_Q28 );
+ S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval );
+
+ /* Scale back to Q0 and saturate */
+ out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
+ }
+}
diff --git a/drivers/opus/silk/bwexpander.c b/drivers/opus/silk/bwexpander.c
new file mode 100644
index 0000000000..d757483872
--- /dev/null
+++ b/drivers/opus/silk/bwexpander.c
@@ -0,0 +1,51 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander(
+ opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */
+ const opus_int d, /* I Length of ar */
+ opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */
+)
+{
+ opus_int i;
+ opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536;
+
+ /* NB: Dont use silk_SMULWB, instead of silk_RSHIFT_ROUND( silk_MUL(), 16 ), below. */
+ /* Bias in silk_SMULWB can lead to unstable filters */
+ for( i = 0; i < d - 1; i++ ) {
+ ar[ i ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ] ), 16 );
+ chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
+ }
+ ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 );
+}
diff --git a/drivers/opus/silk/bwexpander_32.c b/drivers/opus/silk/bwexpander_32.c
new file mode 100644
index 0000000000..8a60767c0d
--- /dev/null
+++ b/drivers/opus/silk/bwexpander_32.c
@@ -0,0 +1,50 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander_32(
+ opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */
+ const opus_int d, /* I Length of ar */
+ opus_int32 chirp_Q16 /* I Chirp factor in Q16 */
+)
+{
+ opus_int i;
+ opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536;
+
+ for( i = 0; i < d - 1; i++ ) {
+ ar[ i ] = silk_SMULWW( chirp_Q16, ar[ i ] );
+ chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
+ }
+ ar[ d - 1 ] = silk_SMULWW( chirp_Q16, ar[ d - 1 ] );
+}
+
diff --git a/drivers/opus/silk/check_control_input.c b/drivers/opus/silk/check_control_input.c
new file mode 100644
index 0000000000..0e02fff22d
--- /dev/null
+++ b/drivers/opus/silk/check_control_input.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "control.h"
+#include "errors.h"
+
+/* Check encoder control struct */
+opus_int check_control_input(
+ silk_EncControlStruct *encControl /* I Control structure */
+)
+{
+ silk_assert( encControl != NULL );
+
+ if( ( ( encControl->API_sampleRate != 8000 ) &&
+ ( encControl->API_sampleRate != 12000 ) &&
+ ( encControl->API_sampleRate != 16000 ) &&
+ ( encControl->API_sampleRate != 24000 ) &&
+ ( encControl->API_sampleRate != 32000 ) &&
+ ( encControl->API_sampleRate != 44100 ) &&
+ ( encControl->API_sampleRate != 48000 ) ) ||
+ ( ( encControl->desiredInternalSampleRate != 8000 ) &&
+ ( encControl->desiredInternalSampleRate != 12000 ) &&
+ ( encControl->desiredInternalSampleRate != 16000 ) ) ||
+ ( ( encControl->maxInternalSampleRate != 8000 ) &&
+ ( encControl->maxInternalSampleRate != 12000 ) &&
+ ( encControl->maxInternalSampleRate != 16000 ) ) ||
+ ( ( encControl->minInternalSampleRate != 8000 ) &&
+ ( encControl->minInternalSampleRate != 12000 ) &&
+ ( encControl->minInternalSampleRate != 16000 ) ) ||
+ ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) ||
+ ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) ||
+ ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) {
+ silk_assert( 0 );
+ return SILK_ENC_FS_NOT_SUPPORTED;
+ }
+ if( encControl->payloadSize_ms != 10 &&
+ encControl->payloadSize_ms != 20 &&
+ encControl->payloadSize_ms != 40 &&
+ encControl->payloadSize_ms != 60 ) {
+ silk_assert( 0 );
+ return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
+ }
+ if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_LOSS_RATE;
+ }
+ if( encControl->useDTX < 0 || encControl->useDTX > 1 ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_DTX_SETTING;
+ }
+ if( encControl->useCBR < 0 || encControl->useCBR > 1 ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_CBR_SETTING;
+ }
+ if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_INBAND_FEC_SETTING;
+ }
+ if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+ }
+ if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+ }
+ if( encControl->nChannelsInternal > encControl->nChannelsAPI ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+ }
+ if( encControl->complexity < 0 || encControl->complexity > 10 ) {
+ silk_assert( 0 );
+ return SILK_ENC_INVALID_COMPLEXITY_SETTING;
+ }
+
+ return SILK_NO_ERROR;
+}
diff --git a/drivers/opus/silk/code_signs.c b/drivers/opus/silk/code_signs.c
new file mode 100644
index 0000000000..8bcc6ecde1
--- /dev/null
+++ b/drivers/opus/silk/code_signs.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/*#define silk_enc_map(a) ((a) > 0 ? 1 : 0)*/
+/*#define silk_dec_map(a) ((a) > 0 ? 1 : -1)*/
+/* shifting avoids if-statement */
+#define silk_enc_map(a) ( silk_RSHIFT( (a), 15 ) + 1 )
+#define silk_dec_map(a) ( silk_LSHIFT( (a), 1 ) - 1 )
+
+/* Encodes signs of excitation */
+void silk_encode_signs(
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ const opus_int8 pulses[], /* I pulse signal */
+ opus_int length, /* I length of input */
+ const opus_int signalType, /* I Signal type */
+ const opus_int quantOffsetType, /* I Quantization offset type */
+ const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */
+)
+{
+ opus_int i, j, p;
+ opus_uint8 icdf[ 2 ];
+ const opus_int8 *q_ptr;
+ const opus_uint8 *icdf_ptr;
+
+ icdf[ 1 ] = 0;
+ q_ptr = pulses;
+ i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) );
+ icdf_ptr = &silk_sign_iCDF[ i ];
+ length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH );
+ for( i = 0; i < length; i++ ) {
+ p = sum_pulses[ i ];
+ if( p > 0 ) {
+ icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ];
+ for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) {
+ if( q_ptr[ j ] != 0 ) {
+ ec_enc_icdf( psRangeEnc, silk_enc_map( q_ptr[ j ]), icdf, 8 );
+ }
+ }
+ }
+ q_ptr += SHELL_CODEC_FRAME_LENGTH;
+ }
+}
+
+/* Decodes signs of excitation */
+void silk_decode_signs(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int pulses[], /* I/O pulse signal */
+ opus_int length, /* I length of input */
+ const opus_int signalType, /* I Signal type */
+ const opus_int quantOffsetType, /* I Quantization offset type */
+ const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */
+)
+{
+ opus_int i, j, p;
+ opus_uint8 icdf[ 2 ];
+ opus_int *q_ptr;
+ const opus_uint8 *icdf_ptr;
+
+ icdf[ 1 ] = 0;
+ q_ptr = pulses;
+ i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) );
+ icdf_ptr = &silk_sign_iCDF[ i ];
+ length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH );
+ for( i = 0; i < length; i++ ) {
+ p = sum_pulses[ i ];
+ if( p > 0 ) {
+ icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ];
+ for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) {
+ if( q_ptr[ j ] > 0 ) {
+ /* attach sign */
+#if 0
+ /* conditional implementation */
+ if( ec_dec_icdf( psRangeDec, icdf, 8 ) == 0 ) {
+ q_ptr[ j ] = -q_ptr[ j ];
+ }
+#else
+ /* implementation with shift, subtraction, multiplication */
+ q_ptr[ j ] *= silk_dec_map( ec_dec_icdf( psRangeDec, icdf, 8 ) );
+#endif
+ }
+ }
+ }
+ q_ptr += SHELL_CODEC_FRAME_LENGTH;
+ }
+}
diff --git a/drivers/opus/silk/control.h b/drivers/opus/silk/control.h
new file mode 100644
index 0000000000..747e5426a0
--- /dev/null
+++ b/drivers/opus/silk/control.h
@@ -0,0 +1,142 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_CONTROL_H
+#define SILK_CONTROL_H
+
+#include "typedef.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Decoder API flags */
+#define FLAG_DECODE_NORMAL 0
+#define FLAG_PACKET_LOST 1
+#define FLAG_DECODE_LBRR 2
+
+/***********************************************/
+/* Structure for controlling encoder operation */
+/***********************************************/
+typedef struct {
+ /* I: Number of channels; 1/2 */
+ opus_int32 nChannelsAPI;
+
+ /* I: Number of channels; 1/2 */
+ opus_int32 nChannelsInternal;
+
+ /* I: Input signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */
+ opus_int32 API_sampleRate;
+
+ /* I: Maximum internal sampling rate in Hertz; 8000/12000/16000 */
+ opus_int32 maxInternalSampleRate;
+
+ /* I: Minimum internal sampling rate in Hertz; 8000/12000/16000 */
+ opus_int32 minInternalSampleRate;
+
+ /* I: Soft request for internal sampling rate in Hertz; 8000/12000/16000 */
+ opus_int32 desiredInternalSampleRate;
+
+ /* I: Number of samples per packet in milliseconds; 10/20/40/60 */
+ opus_int payloadSize_ms;
+
+ /* I: Bitrate during active speech in bits/second; internally limited */
+ opus_int32 bitRate;
+
+ /* I: Uplink packet loss in percent (0-100) */
+ opus_int packetLossPercentage;
+
+ /* I: Complexity mode; 0 is lowest, 10 is highest complexity */
+ opus_int complexity;
+
+ /* I: Flag to enable in-band Forward Error Correction (FEC); 0/1 */
+ opus_int useInBandFEC;
+
+ /* I: Flag to enable discontinuous transmission (DTX); 0/1 */
+ opus_int useDTX;
+
+ /* I: Flag to use constant bitrate */
+ opus_int useCBR;
+
+ /* I: Maximum number of bits allowed for the frame */
+ opus_int maxBits;
+
+ /* I: Causes a smooth downmix to mono */
+ opus_int toMono;
+
+ /* I: Opus encoder is allowing us to switch bandwidth */
+ opus_int opusCanSwitch;
+
+ /* I: Make frames as independent as possible (but still use LPC) */
+ opus_int reducedDependency;
+
+ /* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */
+ opus_int32 internalSampleRate;
+
+ /* O: Flag that bandwidth switching is allowed (because low voice activity) */
+ opus_int allowBandwidthSwitch;
+
+ /* O: Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */
+ opus_int inWBmodeWithoutVariableLP;
+
+ /* O: Stereo width */
+ opus_int stereoWidth_Q14;
+
+ /* O: Tells the Opus encoder we're ready to switch */
+ opus_int switchReady;
+
+} silk_EncControlStruct;
+
+/**************************************************************************/
+/* Structure for controlling decoder operation and reading decoder status */
+/**************************************************************************/
+typedef struct {
+ /* I: Number of channels; 1/2 */
+ opus_int32 nChannelsAPI;
+
+ /* I: Number of channels; 1/2 */
+ opus_int32 nChannelsInternal;
+
+ /* I: Output signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */
+ opus_int32 API_sampleRate;
+
+ /* I: Internal sampling rate used, in Hertz; 8000/12000/16000 */
+ opus_int32 internalSampleRate;
+
+ /* I: Number of samples per packet in milliseconds; 10/20/40/60 */
+ opus_int payloadSize_ms;
+
+ /* O: Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz */
+ opus_int prevPitchLag;
+} silk_DecControlStruct;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/control_SNR.c b/drivers/opus/silk/control_SNR.c
new file mode 100644
index 0000000000..ae6351798b
--- /dev/null
+++ b/drivers/opus/silk/control_SNR.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "tuning_parameters.h"
+
+/* Control SNR of redidual quantizer */
+opus_int silk_control_SNR(
+ silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */
+ opus_int32 TargetRate_bps /* I Target max bitrate (bps) */
+)
+{
+ opus_int k, ret = SILK_NO_ERROR;
+ opus_int32 frac_Q6;
+ const opus_int32 *rateTable;
+
+ /* Set bitrate/coding quality */
+ TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS );
+ if( TargetRate_bps != psEncC->TargetRate_bps ) {
+ psEncC->TargetRate_bps = TargetRate_bps;
+
+ /* If new TargetRate_bps, translate to SNR_dB value */
+ if( psEncC->fs_kHz == 8 ) {
+ rateTable = silk_TargetRate_table_NB;
+ } else if( psEncC->fs_kHz == 12 ) {
+ rateTable = silk_TargetRate_table_MB;
+ } else {
+ rateTable = silk_TargetRate_table_WB;
+ }
+
+ /* Reduce bitrate for 10 ms modes in these calculations */
+ if( psEncC->nb_subfr == 2 ) {
+ TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS;
+ }
+
+ /* Find bitrate interval in table and interpolate */
+ for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) {
+ if( TargetRate_bps <= rateTable[ k ] ) {
+ frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ),
+ rateTable[ k ] - rateTable[ k - 1 ] );
+ psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] );
+ break;
+ }
+ }
+
+ /* Reduce coding quality whenever LBRR is enabled, to free up some bits */
+ if( psEncC->LBRR_enabled ) {
+ psEncC->SNR_dB_Q7 = silk_SMLABB( psEncC->SNR_dB_Q7, 12 - psEncC->LBRR_GainIncreases, SILK_FIX_CONST( -0.25, 7 ) );
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/opus/silk/control_audio_bandwidth.c b/drivers/opus/silk/control_audio_bandwidth.c
new file mode 100644
index 0000000000..6f060bbd29
--- /dev/null
+++ b/drivers/opus/silk/control_audio_bandwidth.c
@@ -0,0 +1,126 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "tuning_parameters.h"
+
+/* Control internal sampling rate */
+opus_int silk_control_audio_bandwidth(
+ silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */
+ silk_EncControlStruct *encControl /* I Control structure */
+)
+{
+ opus_int fs_kHz;
+ opus_int32 fs_Hz;
+
+ fs_kHz = psEncC->fs_kHz;
+ fs_Hz = silk_SMULBB( fs_kHz, 1000 );
+ if( fs_Hz == 0 ) {
+ /* Encoder has just been initialized */
+ fs_Hz = silk_min( psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz );
+ fs_kHz = silk_DIV32_16( fs_Hz, 1000 );
+ } else if( fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz ) {
+ /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */
+ fs_Hz = psEncC->API_fs_Hz;
+ fs_Hz = silk_min( fs_Hz, psEncC->maxInternal_fs_Hz );
+ fs_Hz = silk_max( fs_Hz, psEncC->minInternal_fs_Hz );
+ fs_kHz = silk_DIV32_16( fs_Hz, 1000 );
+ } else {
+ /* State machine for the internal sampling rate switching */
+ if( psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES ) {
+ /* Stop transition phase */
+ psEncC->sLP.mode = 0;
+ }
+ if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) {
+ /* Check if we should switch down */
+ if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz )
+ {
+ /* Switch down */
+ if( psEncC->sLP.mode == 0 ) {
+ /* New transition */
+ psEncC->sLP.transition_frame_no = TRANSITION_FRAMES;
+
+ /* Reset transition filter state */
+ silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
+ }
+ if( encControl->opusCanSwitch ) {
+ /* Stop transition phase */
+ psEncC->sLP.mode = 0;
+
+ /* Switch to a lower sample frequency */
+ fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8;
+ } else {
+ if( psEncC->sLP.transition_frame_no <= 0 ) {
+ encControl->switchReady = 1;
+ /* Make room for redundancy */
+ encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 );
+ } else {
+ /* Direction: down (at double speed) */
+ psEncC->sLP.mode = -2;
+ }
+ }
+ }
+ else
+ /* Check if we should switch up */
+ if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz )
+ {
+ /* Switch up */
+ if( encControl->opusCanSwitch ) {
+ /* Switch to a higher sample frequency */
+ fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16;
+
+ /* New transition */
+ psEncC->sLP.transition_frame_no = 0;
+
+ /* Reset transition filter state */
+ silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
+
+ /* Direction: up */
+ psEncC->sLP.mode = 1;
+ } else {
+ if( psEncC->sLP.mode == 0 ) {
+ encControl->switchReady = 1;
+ /* Make room for redundancy */
+ encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 );
+ } else {
+ /* Direction: up */
+ psEncC->sLP.mode = 1;
+ }
+ }
+ } else {
+ if (psEncC->sLP.mode<0)
+ psEncC->sLP.mode = 1;
+ }
+ }
+ }
+
+ return fs_kHz;
+}
diff --git a/drivers/opus/silk/control_codec.c b/drivers/opus/silk/control_codec.c
new file mode 100644
index 0000000000..2d7b10e9b7
--- /dev/null
+++ b/drivers/opus/silk/control_codec.c
@@ -0,0 +1,422 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#define silk_encoder_state_Fxx silk_encoder_state_FIX
+#else
+#include "main_FLP.h"
+#define silk_encoder_state_Fxx silk_encoder_state_FLP
+#endif
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+#include "pitch_est_defines.h"
+
+static opus_int silk_setup_resamplers(
+ silk_encoder_state_Fxx *psEnc, /* I/O */
+ opus_int fs_kHz /* I */
+);
+
+static opus_int silk_setup_fs(
+ silk_encoder_state_Fxx *psEnc, /* I/O */
+ opus_int fs_kHz, /* I */
+ opus_int PacketSize_ms /* I */
+);
+
+static opus_int silk_setup_complexity(
+ silk_encoder_state *psEncC, /* I/O */
+ opus_int Complexity /* I */
+);
+
+static OPUS_INLINE opus_int silk_setup_LBRR(
+ silk_encoder_state *psEncC, /* I/O */
+ const opus_int32 TargetRate_bps /* I */
+);
+
+
+/* Control encoder */
+opus_int silk_control_encoder(
+ silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */
+ silk_EncControlStruct *encControl, /* I Control structure */
+ const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */
+ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */
+ const opus_int channelNb, /* I Channel number */
+ const opus_int force_fs_kHz
+)
+{
+ opus_int fs_kHz, ret = 0;
+
+ psEnc->sCmn.useDTX = encControl->useDTX;
+ psEnc->sCmn.useCBR = encControl->useCBR;
+ psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate;
+ psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate;
+ psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate;
+ psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate;
+ psEnc->sCmn.useInBandFEC = encControl->useInBandFEC;
+ psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI;
+ psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal;
+ psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch;
+ psEnc->sCmn.channelNb = channelNb;
+
+ if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) {
+ if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) {
+ /* Change in API sampling rate in the middle of encoding a packet */
+ ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz );
+ }
+ return ret;
+ }
+
+ /* Beyond this point we know that there are no previously coded frames in the payload buffer */
+
+ /********************************************/
+ /* Determine internal sampling rate */
+ /********************************************/
+ fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl );
+ if( force_fs_kHz ) {
+ fs_kHz = force_fs_kHz;
+ }
+ /********************************************/
+ /* Prepare resampler and buffered data */
+ /********************************************/
+ ret += silk_setup_resamplers( psEnc, fs_kHz );
+
+ /********************************************/
+ /* Set internal sampling frequency */
+ /********************************************/
+ ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms );
+
+ /********************************************/
+ /* Set encoding complexity */
+ /********************************************/
+ ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity );
+
+ /********************************************/
+ /* Set packet loss rate measured by farend */
+ /********************************************/
+ psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage;
+
+ /********************************************/
+ /* Set LBRR usage */
+ /********************************************/
+ ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps );
+
+ psEnc->sCmn.controlled_since_last_payload = 1;
+
+ return ret;
+}
+
+static opus_int silk_setup_resamplers(
+ silk_encoder_state_Fxx *psEnc, /* I/O */
+ opus_int fs_kHz /* I */
+)
+{
+ opus_int ret = SILK_NO_ERROR;
+ SAVE_STACK;
+
+ if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz )
+ {
+ if( psEnc->sCmn.fs_kHz == 0 ) {
+ /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
+ ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 );
+ } else {
+ VARDECL( opus_int16, x_buf_API_fs_Hz );
+ VARDECL( silk_resampler_state_struct, temp_resampler_state );
+#ifdef OPUS_FIXED_POINT
+ opus_int16 *x_bufFIX = psEnc->x_buf;
+#else
+ VARDECL( opus_int16, x_bufFIX );
+ opus_int32 new_buf_samples;
+#endif
+ opus_int32 api_buf_samples;
+ opus_int32 old_buf_samples;
+ opus_int32 buf_length_ms;
+
+ buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS;
+ old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz;
+
+#ifndef OPUS_FIXED_POINT
+ new_buf_samples = buf_length_ms * fs_kHz;
+ ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ),
+ opus_int16 );
+ silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples );
+#endif
+
+ /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
+ ALLOC( temp_resampler_state, 1, silk_resampler_state_struct );
+ ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );
+
+ /* Calculate number of samples to temporarily upsample */
+ api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 );
+
+ /* Temporary resampling of x_buf data to API_fs_Hz */
+ ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 );
+ ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples );
+
+ /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
+ ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 );
+
+ /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */
+ ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples );
+
+#ifndef OPUS_FIXED_POINT
+ silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples);
+#endif
+ }
+ }
+
+ psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz;
+
+ RESTORE_STACK;
+ return ret;
+}
+
+static opus_int silk_setup_fs(
+ silk_encoder_state_Fxx *psEnc, /* I/O */
+ opus_int fs_kHz, /* I */
+ opus_int PacketSize_ms /* I */
+)
+{
+ opus_int ret = SILK_NO_ERROR;
+
+ /* Set packet size */
+ if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) {
+ if( ( PacketSize_ms != 10 ) &&
+ ( PacketSize_ms != 20 ) &&
+ ( PacketSize_ms != 40 ) &&
+ ( PacketSize_ms != 60 ) ) {
+ ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
+ }
+ if( PacketSize_ms <= 10 ) {
+ psEnc->sCmn.nFramesPerPacket = 1;
+ psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1;
+ psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz );
+ psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
+ if( psEnc->sCmn.fs_kHz == 8 ) {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+ } else {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+ }
+ } else {
+ psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS );
+ psEnc->sCmn.nb_subfr = MAX_NB_SUBFR;
+ psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz );
+ psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
+ if( psEnc->sCmn.fs_kHz == 8 ) {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+ } else {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
+ }
+ }
+ psEnc->sCmn.PacketSize_ms = PacketSize_ms;
+ psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */
+ }
+
+ /* Set internal sampling frequency */
+ silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+ silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
+ if( psEnc->sCmn.fs_kHz != fs_kHz ) {
+ /* reset part of the state */
+ silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) );
+ silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) );
+ silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) );
+ silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+ silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
+ psEnc->sCmn.inputBufIx = 0;
+ psEnc->sCmn.nFramesEncoded = 0;
+ psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */
+
+ /* Initialize non-zero parameters */
+ psEnc->sCmn.prevLag = 100;
+ psEnc->sCmn.first_frame_after_reset = 1;
+ psEnc->sPrefilt.lagPrev = 100;
+ psEnc->sShape.LastGainIndex = 10;
+ psEnc->sCmn.sNSQ.lagPrev = 100;
+ psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536;
+ psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+
+ psEnc->sCmn.fs_kHz = fs_kHz;
+ if( psEnc->sCmn.fs_kHz == 8 ) {
+ if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+ } else {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+ }
+ } else {
+ if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
+ } else {
+ psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+ }
+ }
+ if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) {
+ psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER;
+ psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB;
+ } else {
+ psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER;
+ psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB;
+ }
+ psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz;
+ psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );
+ psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
+ psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz );
+ psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz );
+ if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+ psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
+ } else {
+ psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
+ }
+ if( psEnc->sCmn.fs_kHz == 16 ) {
+ psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 );
+ psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
+ } else if( psEnc->sCmn.fs_kHz == 12 ) {
+ psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 );
+ psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
+ } else {
+ psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 );
+ psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
+ }
+ }
+
+ /* Check that settings are valid */
+ silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length );
+
+ return ret;
+}
+
+static opus_int silk_setup_complexity(
+ silk_encoder_state *psEncC, /* I/O */
+ opus_int Complexity /* I */
+)
+{
+ opus_int ret = 0;
+
+ /* Set encoding complexity */
+ silk_assert( Complexity >= 0 && Complexity <= 10 );
+ if( Complexity < 2 ) {
+ psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX;
+ psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 );
+ psEncC->pitchEstimationLPCOrder = 6;
+ psEncC->shapingLPCOrder = 8;
+ psEncC->la_shape = 3 * psEncC->fs_kHz;
+ psEncC->nStatesDelayedDecision = 1;
+ psEncC->useInterpolatedNLSFs = 0;
+ psEncC->LTPQuantLowComplexity = 1;
+ psEncC->NLSF_MSVQ_Survivors = 2;
+ psEncC->warping_Q16 = 0;
+ } else if( Complexity < 4 ) {
+ psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
+ psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 );
+ psEncC->pitchEstimationLPCOrder = 8;
+ psEncC->shapingLPCOrder = 10;
+ psEncC->la_shape = 5 * psEncC->fs_kHz;
+ psEncC->nStatesDelayedDecision = 1;
+ psEncC->useInterpolatedNLSFs = 0;
+ psEncC->LTPQuantLowComplexity = 0;
+ psEncC->NLSF_MSVQ_Survivors = 4;
+ psEncC->warping_Q16 = 0;
+ } else if( Complexity < 6 ) {
+ psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
+ psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 );
+ psEncC->pitchEstimationLPCOrder = 10;
+ psEncC->shapingLPCOrder = 12;
+ psEncC->la_shape = 5 * psEncC->fs_kHz;
+ psEncC->nStatesDelayedDecision = 2;
+ psEncC->useInterpolatedNLSFs = 1;
+ psEncC->LTPQuantLowComplexity = 0;
+ psEncC->NLSF_MSVQ_Survivors = 8;
+ psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+ } else if( Complexity < 8 ) {
+ psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
+ psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 );
+ psEncC->pitchEstimationLPCOrder = 12;
+ psEncC->shapingLPCOrder = 14;
+ psEncC->la_shape = 5 * psEncC->fs_kHz;
+ psEncC->nStatesDelayedDecision = 3;
+ psEncC->useInterpolatedNLSFs = 1;
+ psEncC->LTPQuantLowComplexity = 0;
+ psEncC->NLSF_MSVQ_Survivors = 16;
+ psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+ } else {
+ psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX;
+ psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 );
+ psEncC->pitchEstimationLPCOrder = 16;
+ psEncC->shapingLPCOrder = 16;
+ psEncC->la_shape = 5 * psEncC->fs_kHz;
+ psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES;
+ psEncC->useInterpolatedNLSFs = 1;
+ psEncC->LTPQuantLowComplexity = 0;
+ psEncC->NLSF_MSVQ_Survivors = 32;
+ psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+ }
+
+ /* Do not allow higher pitch estimation LPC order than predict LPC order */
+ psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder );
+ psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape;
+ psEncC->Complexity = Complexity;
+
+ silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER );
+ silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER );
+ silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES );
+ silk_assert( psEncC->warping_Q16 <= 32767 );
+ silk_assert( psEncC->la_shape <= LA_SHAPE_MAX );
+ silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX );
+ silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS );
+
+ return ret;
+}
+
+static OPUS_INLINE opus_int silk_setup_LBRR(
+ silk_encoder_state *psEncC, /* I/O */
+ const opus_int32 TargetRate_bps /* I */
+)
+{
+ opus_int ret = SILK_NO_ERROR;
+ opus_int32 LBRR_rate_thres_bps;
+
+ psEncC->LBRR_enabled = 0;
+ if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) {
+ if( psEncC->fs_kHz == 8 ) {
+ LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS;
+ } else if( psEncC->fs_kHz == 12 ) {
+ LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS;
+ } else {
+ LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS;
+ }
+ LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) );
+
+ if( TargetRate_bps > LBRR_rate_thres_bps ) {
+ /* Set gain increase for coding LBRR excitation */
+ psEncC->LBRR_enabled = 1;
+ psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/opus/silk/debug.c b/drivers/opus/silk/debug.c
new file mode 100644
index 0000000000..2230813fae
--- /dev/null
+++ b/drivers/opus/silk/debug.c
@@ -0,0 +1,170 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "debug.h"
+#include "SigProc_FIX.h"
+
+#if SILK_TIC_TOC
+
+#ifdef _WIN32
+
+#if (defined(_WIN32) || defined(_WINCE))
+#include <windows.h> /* timer */
+#else /* Linux or Mac*/
+#include <sys/time.h>
+#endif
+
+unsigned long silk_GetHighResolutionTime(void) /* O time in usec*/
+{
+ /* Returns a time counter in microsec */
+ /* the resolution is platform dependent */
+ /* but is typically 1.62 us resolution */
+ LARGE_INTEGER lpPerformanceCount;
+ LARGE_INTEGER lpFrequency;
+ QueryPerformanceCounter(&lpPerformanceCount);
+ QueryPerformanceFrequency(&lpFrequency);
+ return (unsigned long)((1000000*(lpPerformanceCount.QuadPart)) / lpFrequency.QuadPart);
+}
+#else /* Linux or Mac*/
+unsigned long GetHighResolutionTime(void) /* O time in usec*/
+{
+ struct timeval tv;
+ gettimeofday(&tv, 0);
+ return((tv.tv_sec*1000000)+(tv.tv_usec));
+}
+#endif
+
+int silk_Timer_nTimers = 0;
+int silk_Timer_depth_ctr = 0;
+char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN];
+#ifdef WIN32
+LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX];
+#else
+unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX];
+#endif
+unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX];
+opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX];
+opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX];
+opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX];
+opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX];
+
+#ifdef WIN32
+void silk_TimerSave(char *file_name)
+{
+ if( silk_Timer_nTimers > 0 )
+ {
+ int k;
+ FILE *fp;
+ LARGE_INTEGER lpFrequency;
+ LARGE_INTEGER lpPerformanceCount1, lpPerformanceCount2;
+ int del = 0x7FFFFFFF;
+ double avg, sum_avg;
+ /* estimate overhead of calling performance counters */
+ for( k = 0; k < 1000; k++ ) {
+ QueryPerformanceCounter(&lpPerformanceCount1);
+ QueryPerformanceCounter(&lpPerformanceCount2);
+ lpPerformanceCount2.QuadPart -= lpPerformanceCount1.QuadPart;
+ if( (int)lpPerformanceCount2.LowPart < del )
+ del = lpPerformanceCount2.LowPart;
+ }
+ QueryPerformanceFrequency(&lpFrequency);
+ /* print results to file */
+ sum_avg = 0.0f;
+ for( k = 0; k < silk_Timer_nTimers; k++ ) {
+ if (silk_Timer_depth[k] == 0) {
+ sum_avg += (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart * silk_Timer_cnt[k];
+ }
+ }
+ fp = fopen(file_name, "w");
+ fprintf(fp, " min avg %% max count\n");
+ for( k = 0; k < silk_Timer_nTimers; k++ ) {
+ if (silk_Timer_depth[k] == 0) {
+ fprintf(fp, "%-28s", silk_Timer_tags[k]);
+ } else if (silk_Timer_depth[k] == 1) {
+ fprintf(fp, " %-27s", silk_Timer_tags[k]);
+ } else if (silk_Timer_depth[k] == 2) {
+ fprintf(fp, " %-26s", silk_Timer_tags[k]);
+ } else if (silk_Timer_depth[k] == 3) {
+ fprintf(fp, " %-25s", silk_Timer_tags[k]);
+ } else {
+ fprintf(fp, " %-24s", silk_Timer_tags[k]);
+ }
+ avg = (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart;
+ fprintf(fp, "%8.2f", (1e6 * (silk_max_64(silk_Timer_min[k] - del, 0))) / lpFrequency.QuadPart);
+ fprintf(fp, "%12.2f %6.2f", avg, 100.0 * avg / sum_avg * silk_Timer_cnt[k]);
+ fprintf(fp, "%12.2f", (1e6 * (silk_max_64(silk_Timer_max[k] - del, 0))) / lpFrequency.QuadPart);
+ fprintf(fp, "%10d\n", silk_Timer_cnt[k]);
+ }
+ fprintf(fp, " microseconds\n");
+ fclose(fp);
+ }
+}
+#else
+void silk_TimerSave(char *file_name)
+{
+ if( silk_Timer_nTimers > 0 )
+ {
+ int k;
+ FILE *fp;
+ /* print results to file */
+ fp = fopen(file_name, "w");
+ fprintf(fp, " min avg max count\n");
+ for( k = 0; k < silk_Timer_nTimers; k++ )
+ {
+ if (silk_Timer_depth[k] == 0) {
+ fprintf(fp, "%-28s", silk_Timer_tags[k]);
+ } else if (silk_Timer_depth[k] == 1) {
+ fprintf(fp, " %-27s", silk_Timer_tags[k]);
+ } else if (silk_Timer_depth[k] == 2) {
+ fprintf(fp, " %-26s", silk_Timer_tags[k]);
+ } else if (silk_Timer_depth[k] == 3) {
+ fprintf(fp, " %-25s", silk_Timer_tags[k]);
+ } else {
+ fprintf(fp, " %-24s", silk_Timer_tags[k]);
+ }
+ fprintf(fp, "%d ", silk_Timer_min[k]);
+ fprintf(fp, "%f ", (double)silk_Timer_sum[k] / (double)silk_Timer_cnt[k]);
+ fprintf(fp, "%d ", silk_Timer_max[k]);
+ fprintf(fp, "%10d\n", silk_Timer_cnt[k]);
+ }
+ fprintf(fp, " microseconds\n");
+ fclose(fp);
+ }
+}
+#endif
+
+#endif /* SILK_TIC_TOC */
+
+#if SILK_DEBUG
+FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ];
+int silk_debug_store_count = 0;
+#endif /* SILK_DEBUG */
+
diff --git a/drivers/opus/silk/debug.h b/drivers/opus/silk/debug.h
new file mode 100644
index 0000000000..efb6d3e99e
--- /dev/null
+++ b/drivers/opus/silk/debug.h
@@ -0,0 +1,279 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_DEBUG_H
+#define SILK_DEBUG_H
+
+#include "typedef.h"
+#include <stdio.h> /* file writing */
+#include <string.h> /* strcpy, strcmp */
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+unsigned long GetHighResolutionTime(void); /* O time in usec*/
+
+/* make SILK_DEBUG dependent on compiler's _DEBUG */
+#if defined _WIN32
+ #ifdef _DEBUG
+ #define SILK_DEBUG 1
+ #else
+ #define SILK_DEBUG 0
+ #endif
+
+ /* overrule the above */
+ #if 0
+ /* #define NO_ASSERTS*/
+ #undef SILK_DEBUG
+ #define SILK_DEBUG 1
+ #endif
+#else
+ #define SILK_DEBUG 0
+#endif
+
+/* Flag for using timers */
+#define SILK_TIC_TOC 0
+
+
+#if SILK_TIC_TOC
+
+#if (defined(_WIN32) || defined(_WINCE))
+#include <windows.h> /* timer */
+#else /* Linux or Mac*/
+#include <sys/time.h>
+#endif
+
+/*********************************/
+/* timer functions for profiling */
+/*********************************/
+/* example: */
+/* */
+/* TIC(LPC) */
+/* do_LPC(in_vec, order, acoef); // do LPC analysis */
+/* TOC(LPC) */
+/* */
+/* and call the following just before exiting (from main) */
+/* */
+/* silk_TimerSave("silk_TimingData.txt"); */
+/* */
+/* results are now in silk_TimingData.txt */
+
+void silk_TimerSave(char *file_name);
+
+/* max number of timers (in different locations) */
+#define silk_NUM_TIMERS_MAX 50
+/* max length of name tags in TIC(..), TOC(..) */
+#define silk_NUM_TIMERS_MAX_TAG_LEN 30
+
+extern int silk_Timer_nTimers;
+extern int silk_Timer_depth_ctr;
+extern char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN];
+#ifdef _WIN32
+extern LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX];
+#else
+extern unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX];
+#endif
+extern unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX];
+extern opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX];
+extern opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX];
+extern opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX];
+extern opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX];
+
+/* WARNING: TIC()/TOC can measure only up to 0.1 seconds at a time */
+#ifdef _WIN32
+#define TIC(TAG_NAME) { \
+ static int init = 0; \
+ static int ID = -1; \
+ if( init == 0 ) \
+ { \
+ int k; \
+ init = 1; \
+ for( k = 0; k < silk_Timer_nTimers; k++ ) { \
+ if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \
+ ID = k; \
+ break; \
+ } \
+ } \
+ if (ID == -1) { \
+ ID = silk_Timer_nTimers; \
+ silk_Timer_nTimers++; \
+ silk_Timer_depth[ID] = silk_Timer_depth_ctr; \
+ strcpy(silk_Timer_tags[ID], #TAG_NAME); \
+ silk_Timer_cnt[ID] = 0; \
+ silk_Timer_sum[ID] = 0; \
+ silk_Timer_min[ID] = 0xFFFFFFFF; \
+ silk_Timer_max[ID] = 0; \
+ } \
+ } \
+ silk_Timer_depth_ctr++; \
+ QueryPerformanceCounter(&silk_Timer_start[ID]); \
+}
+#else
+#define TIC(TAG_NAME) { \
+ static int init = 0; \
+ static int ID = -1; \
+ if( init == 0 ) \
+ { \
+ int k; \
+ init = 1; \
+ for( k = 0; k < silk_Timer_nTimers; k++ ) { \
+ if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \
+ ID = k; \
+ break; \
+ } \
+ } \
+ if (ID == -1) { \
+ ID = silk_Timer_nTimers; \
+ silk_Timer_nTimers++; \
+ silk_Timer_depth[ID] = silk_Timer_depth_ctr; \
+ strcpy(silk_Timer_tags[ID], #TAG_NAME); \
+ silk_Timer_cnt[ID] = 0; \
+ silk_Timer_sum[ID] = 0; \
+ silk_Timer_min[ID] = 0xFFFFFFFF; \
+ silk_Timer_max[ID] = 0; \
+ } \
+ } \
+ silk_Timer_depth_ctr++; \
+ silk_Timer_start[ID] = GetHighResolutionTime(); \
+}
+#endif
+
+#ifdef _WIN32
+#define TOC(TAG_NAME) { \
+ LARGE_INTEGER lpPerformanceCount; \
+ static int init = 0; \
+ static int ID = 0; \
+ if( init == 0 ) \
+ { \
+ int k; \
+ init = 1; \
+ for( k = 0; k < silk_Timer_nTimers; k++ ) { \
+ if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \
+ ID = k; \
+ break; \
+ } \
+ } \
+ } \
+ QueryPerformanceCounter(&lpPerformanceCount); \
+ lpPerformanceCount.QuadPart -= silk_Timer_start[ID].QuadPart; \
+ if((lpPerformanceCount.QuadPart < 100000000) && \
+ (lpPerformanceCount.QuadPart >= 0)) { \
+ silk_Timer_cnt[ID]++; \
+ silk_Timer_sum[ID] += lpPerformanceCount.QuadPart; \
+ if( lpPerformanceCount.QuadPart > silk_Timer_max[ID] ) \
+ silk_Timer_max[ID] = lpPerformanceCount.QuadPart; \
+ if( lpPerformanceCount.QuadPart < silk_Timer_min[ID] ) \
+ silk_Timer_min[ID] = lpPerformanceCount.QuadPart; \
+ } \
+ silk_Timer_depth_ctr--; \
+}
+#else
+#define TOC(TAG_NAME) { \
+ unsigned long endTime; \
+ static int init = 0; \
+ static int ID = 0; \
+ if( init == 0 ) \
+ { \
+ int k; \
+ init = 1; \
+ for( k = 0; k < silk_Timer_nTimers; k++ ) { \
+ if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \
+ ID = k; \
+ break; \
+ } \
+ } \
+ } \
+ endTime = GetHighResolutionTime(); \
+ endTime -= silk_Timer_start[ID]; \
+ if((endTime < 100000000) && \
+ (endTime >= 0)) { \
+ silk_Timer_cnt[ID]++; \
+ silk_Timer_sum[ID] += endTime; \
+ if( endTime > silk_Timer_max[ID] ) \
+ silk_Timer_max[ID] = endTime; \
+ if( endTime < silk_Timer_min[ID] ) \
+ silk_Timer_min[ID] = endTime; \
+ } \
+ silk_Timer_depth_ctr--; \
+}
+#endif
+
+#else /* SILK_TIC_TOC */
+
+/* define macros as empty strings */
+#define TIC(TAG_NAME)
+#define TOC(TAG_NAME)
+#define silk_TimerSave(FILE_NAME)
+
+#endif /* SILK_TIC_TOC */
+
+
+#if SILK_DEBUG
+/************************************/
+/* write data to file for debugging */
+/************************************/
+/* Example: DEBUG_STORE_DATA(testfile.pcm, &RIN[0], 160*sizeof(opus_int16)); */
+
+#define silk_NUM_STORES_MAX 100
+extern FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ];
+extern int silk_debug_store_count;
+
+/* Faster way of storing the data */
+#define DEBUG_STORE_DATA( FILE_NAME, DATA_PTR, N_BYTES ) { \
+ static opus_int init = 0, cnt = 0; \
+ static FILE **fp; \
+ if (init == 0) { \
+ init = 1; \
+ cnt = silk_debug_store_count++; \
+ silk_debug_store_fp[ cnt ] = fopen(#FILE_NAME, "wb"); \
+ } \
+ fwrite((DATA_PTR), (N_BYTES), 1, silk_debug_store_fp[ cnt ]); \
+}
+
+/* Call this at the end of main() */
+#define SILK_DEBUG_STORE_CLOSE_FILES { \
+ opus_int i; \
+ for( i = 0; i < silk_debug_store_count; i++ ) { \
+ fclose( silk_debug_store_fp[ i ] ); \
+ } \
+}
+
+#else /* SILK_DEBUG */
+
+/* define macros as empty strings */
+#define DEBUG_STORE_DATA(FILE_NAME, DATA_PTR, N_BYTES)
+#define SILK_DEBUG_STORE_CLOSE_FILES
+
+#endif /* SILK_DEBUG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_DEBUG_H */
diff --git a/drivers/opus/silk/dec_API.c b/drivers/opus/silk/dec_API.c
new file mode 100644
index 0000000000..cd72115a20
--- /dev/null
+++ b/drivers/opus/silk/dec_API.c
@@ -0,0 +1,397 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#include "API.h"
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/************************/
+/* Decoder Super Struct */
+/************************/
+typedef struct {
+ silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ];
+ stereo_dec_state sStereo;
+ opus_int nChannelsAPI;
+ opus_int nChannelsInternal;
+ opus_int prev_decode_only_middle;
+} silk_decoder;
+
+/*********************/
+/* Decoder functions */
+/*********************/
+
+opus_int silk_Get_Decoder_Size( /* O Returns error code */
+ opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
+)
+{
+ opus_int ret = SILK_NO_ERROR;
+
+ *decSizeBytes = sizeof( silk_decoder );
+
+ return ret;
+}
+
+/* Reset decoder state */
+opus_int silk_InitDecoder( /* O Returns error code */
+ void *decState /* I/O State */
+)
+{
+ opus_int n, ret = SILK_NO_ERROR;
+ silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+
+ for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
+ ret = silk_init_decoder( &channel_state[ n ] );
+ }
+ silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
+ /* Not strictly needed, but it's cleaner that way */
+ ((silk_decoder *)decState)->prev_decode_only_middle = 0;
+
+ return ret;
+}
+
+/* Decode a frame */
+opus_int silk_Decode( /* O Returns error code */
+ void* decState, /* I/O State */
+ silk_DecControlStruct* decControl, /* I/O Control Structure */
+ opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
+ opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int16 *samplesOut, /* O Decoded output speech vector */
+ opus_int32 *nSamplesOut /* O Number of samples decoded */
+)
+{
+ opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
+ opus_int32 nSamplesOutDec, LBRR_symbol;
+ opus_int16 *samplesOut1_tmp[ 2 ];
+ VARDECL( opus_int16, samplesOut1_tmp_storage );
+ VARDECL( opus_int16, samplesOut2_tmp );
+ opus_int32 MS_pred_Q13[ 2 ] = { 0 };
+ opus_int16 *resample_out_ptr;
+ silk_decoder *psDec = ( silk_decoder * )decState;
+ silk_decoder_state *channel_state = psDec->channel_state;
+ opus_int has_side;
+ opus_int stereo_to_mono;
+ SAVE_STACK;
+
+ silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
+
+ /**********************************/
+ /* Test if first frame in payload */
+ /**********************************/
+ if( newPacketFlag ) {
+ for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+ channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */
+ }
+ }
+
+ /* If Mono -> Stereo transition in bitstream: init state of second channel */
+ if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
+ ret += silk_init_decoder( &channel_state[ 1 ] );
+ }
+
+ stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
+ ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
+
+ if( channel_state[ 0 ].nFramesDecoded == 0 ) {
+ for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+ opus_int fs_kHz_dec;
+ if( decControl->payloadSize_ms == 0 ) {
+ /* Assuming packet loss, use 10 ms */
+ channel_state[ n ].nFramesPerPacket = 1;
+ channel_state[ n ].nb_subfr = 2;
+ } else if( decControl->payloadSize_ms == 10 ) {
+ channel_state[ n ].nFramesPerPacket = 1;
+ channel_state[ n ].nb_subfr = 2;
+ } else if( decControl->payloadSize_ms == 20 ) {
+ channel_state[ n ].nFramesPerPacket = 1;
+ channel_state[ n ].nb_subfr = 4;
+ } else if( decControl->payloadSize_ms == 40 ) {
+ channel_state[ n ].nFramesPerPacket = 2;
+ channel_state[ n ].nb_subfr = 4;
+ } else if( decControl->payloadSize_ms == 60 ) {
+ channel_state[ n ].nFramesPerPacket = 3;
+ channel_state[ n ].nb_subfr = 4;
+ } else {
+ silk_assert( 0 );
+ RESTORE_STACK;
+ return SILK_DEC_INVALID_FRAME_SIZE;
+ }
+ fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
+ if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
+ silk_assert( 0 );
+ RESTORE_STACK;
+ return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+ }
+ ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
+ }
+ }
+
+ if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
+ silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
+ silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
+ silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
+ }
+ psDec->nChannelsAPI = decControl->nChannelsAPI;
+ psDec->nChannelsInternal = decControl->nChannelsInternal;
+
+ if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
+ ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+ RESTORE_STACK;
+ return( ret );
+ }
+
+ if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
+ /* First decoder call for this payload */
+ /* Decode VAD flags and LBRR flag */
+ for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+ for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
+ channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
+ }
+ channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
+ }
+ /* Decode LBRR flags */
+ for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+ silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
+ if( channel_state[ n ].LBRR_flag ) {
+ if( channel_state[ n ].nFramesPerPacket == 1 ) {
+ channel_state[ n ].LBRR_flags[ 0 ] = 1;
+ } else {
+ LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
+ for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
+ channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
+ }
+ }
+ }
+ }
+
+ if( lostFlag == FLAG_DECODE_NORMAL ) {
+ /* Regular decoding: skip all LBRR data */
+ for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
+ for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+ if( channel_state[ n ].LBRR_flags[ i ] ) {
+ opus_int pulses[ MAX_FRAME_LENGTH ];
+ opus_int condCoding;
+
+ if( decControl->nChannelsInternal == 2 && n == 0 ) {
+ silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
+ if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
+ silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
+ }
+ }
+ /* Use conditional coding if previous frame available */
+ if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
+ condCoding = CODE_CONDITIONALLY;
+ } else {
+ condCoding = CODE_INDEPENDENTLY;
+ }
+ silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
+ silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
+ channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
+ }
+ }
+ }
+ }
+ }
+
+ /* Get MS predictor index */
+ if( decControl->nChannelsInternal == 2 ) {
+ if( lostFlag == FLAG_DECODE_NORMAL ||
+ ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
+ {
+ silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
+ /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
+ if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
+ ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
+ {
+ silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
+ } else {
+ decode_only_middle = 0;
+ }
+ } else {
+ for( n = 0; n < 2; n++ ) {
+ MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
+ }
+ }
+ }
+
+ /* Reset side channel decoder prediction memory for first frame with side coding */
+ if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
+ silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
+ silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
+ psDec->channel_state[ 1 ].lagPrev = 100;
+ psDec->channel_state[ 1 ].LastGainIndex = 10;
+ psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+ psDec->channel_state[ 1 ].first_frame_after_reset = 1;
+ }
+
+ ALLOC( samplesOut1_tmp_storage,
+ decControl->nChannelsInternal*(
+ channel_state[ 0 ].frame_length + 2 ),
+ opus_int16 );
+ samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
+ samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+ + channel_state[ 0 ].frame_length + 2;
+
+ if( lostFlag == FLAG_DECODE_NORMAL ) {
+ has_side = !decode_only_middle;
+ } else {
+ has_side = !psDec->prev_decode_only_middle
+ || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
+ }
+ /* Call decoder for one frame */
+ for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+ if( n == 0 || has_side ) {
+ opus_int FrameIndex;
+ opus_int condCoding;
+
+ FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
+ /* Use independent coding if no previous frame available */
+ if( FrameIndex <= 0 ) {
+ condCoding = CODE_INDEPENDENTLY;
+ } else if( lostFlag == FLAG_DECODE_LBRR ) {
+ condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
+ } else if( n > 0 && psDec->prev_decode_only_middle ) {
+ /* If we skipped a side frame in this packet, we don't
+ need LTP scaling; the LTP state is well-defined. */
+ condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
+ } else {
+ condCoding = CODE_CONDITIONALLY;
+ }
+ ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
+ } else {
+ silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
+ }
+ channel_state[ n ].nFramesDecoded++;
+ }
+
+ if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
+ /* Convert Mid/Side to Left/Right */
+ silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
+ } else {
+ /* Buffering */
+ silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+ silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
+ }
+
+ /* Number of output samples */
+ *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
+
+ /* Set up pointers to temp buffers */
+ ALLOC( samplesOut2_tmp,
+ decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
+ if( decControl->nChannelsAPI == 2 ) {
+ resample_out_ptr = samplesOut2_tmp;
+ } else {
+ resample_out_ptr = samplesOut;
+ }
+
+ for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
+
+ /* Resample decoded signal to API_sampleRate */
+ ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
+
+ /* Interleave if stereo output and stereo stream */
+ if( decControl->nChannelsAPI == 2 ) {
+ for( i = 0; i < *nSamplesOut; i++ ) {
+ samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
+ }
+ }
+ }
+
+ /* Create two channel output from mono stream */
+ if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
+ if ( stereo_to_mono ){
+ /* Resample right channel for newly collapsed stereo just in case
+ we weren't doing collapsing when switching to mono */
+ ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
+
+ for( i = 0; i < *nSamplesOut; i++ ) {
+ samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
+ }
+ } else {
+ for( i = 0; i < *nSamplesOut; i++ ) {
+ samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
+ }
+ }
+ }
+
+ /* Export pitch lag, measured at 48 kHz sampling rate */
+ if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
+ int mult_tab[ 3 ] = { 6, 4, 3 };
+ decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
+ } else {
+ decControl->prevPitchLag = 0;
+ }
+
+ if( lostFlag == FLAG_PACKET_LOST ) {
+ /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
+ if we lose packets when the energy is going down */
+ for ( i = 0; i < psDec->nChannelsInternal; i++ )
+ psDec->channel_state[ i ].LastGainIndex = 10;
+ } else {
+ psDec->prev_decode_only_middle = decode_only_middle;
+ }
+ RESTORE_STACK;
+ return ret;
+}
+
+#if 0
+/* Getting table of contents for a packet */
+opus_int silk_get_TOC(
+ const opus_uint8 *payload, /* I Payload data */
+ const opus_int nBytesIn, /* I Number of input bytes */
+ const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */
+ silk_TOC_struct *Silk_TOC /* O Type of content */
+)
+{
+ opus_int i, flags, ret = SILK_NO_ERROR;
+
+ if( nBytesIn < 1 ) {
+ return -1;
+ }
+ if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
+ return -1;
+ }
+
+ silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
+
+ /* For stereo, extract the flags for the mid channel */
+ flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
+
+ Silk_TOC->inbandFECFlag = flags & 1;
+ for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
+ flags = silk_RSHIFT( flags, 1 );
+ Silk_TOC->VADFlags[ i ] = flags & 1;
+ Silk_TOC->VADFlag |= flags & 1;
+ }
+
+ return ret;
+}
+#endif
diff --git a/drivers/opus/silk/decode_core.c b/drivers/opus/silk/decode_core.c
new file mode 100644
index 0000000000..8f801ea7ad
--- /dev/null
+++ b/drivers/opus/silk/decode_core.c
@@ -0,0 +1,238 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/**********************************************************/
+/* Core decoder. Performs inverse NSQ operation LTP + LPC */
+/**********************************************************/
+void silk_decode_core(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ opus_int16 xq[], /* O Decoded speech */
+ const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */
+)
+{
+ opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
+ opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ];
+ VARDECL( opus_int16, sLTP );
+ VARDECL( opus_int32, sLTP_Q15 );
+ opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
+ opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
+ VARDECL( opus_int32, res_Q14 );
+ VARDECL( opus_int32, sLPC_Q14 );
+ SAVE_STACK;
+
+ silk_assert( psDec->prev_gain_Q16 != 0 );
+
+ ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+ ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+ ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
+ ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
+
+ offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
+
+ if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) {
+ NLSF_interpolation_flag = 1;
+ } else {
+ NLSF_interpolation_flag = 0;
+ }
+
+ /* Decode excitation */
+ rand_seed = psDec->indices.Seed;
+ for( i = 0; i < psDec->frame_length; i++ ) {
+ rand_seed = silk_RAND( rand_seed );
+ psDec->exc_Q14[ i ] = silk_LSHIFT( (opus_int32)pulses[ i ], 14 );
+ if( psDec->exc_Q14[ i ] > 0 ) {
+ psDec->exc_Q14[ i ] -= QUANT_LEVEL_ADJUST_Q10 << 4;
+ } else
+ if( psDec->exc_Q14[ i ] < 0 ) {
+ psDec->exc_Q14[ i ] += QUANT_LEVEL_ADJUST_Q10 << 4;
+ }
+ psDec->exc_Q14[ i ] += offset_Q10 << 4;
+ if( rand_seed < 0 ) {
+ psDec->exc_Q14[ i ] = -psDec->exc_Q14[ i ];
+ }
+
+ rand_seed = silk_ADD32_ovflw( rand_seed, pulses[ i ] );
+ }
+
+ /* Copy LPC state */
+ silk_memcpy( sLPC_Q14, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+ pexc_Q14 = psDec->exc_Q14;
+ pxq = xq;
+ sLTP_buf_idx = psDec->ltp_mem_length;
+ /* Loop over subframes */
+ for( k = 0; k < psDec->nb_subfr; k++ ) {
+ pres_Q14 = res_Q14;
+ A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];
+
+ /* Preload LPC coeficients to array on stack. Gives small performance gain */
+ silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
+ B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
+ signalType = psDec->indices.signalType;
+
+ Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
+ inv_gain_Q31 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 47 );
+
+ /* Calculate gain adjustment factor */
+ if( psDecCtrl->Gains_Q16[ k ] != psDec->prev_gain_Q16 ) {
+ gain_adj_Q16 = silk_DIV32_varQ( psDec->prev_gain_Q16, psDecCtrl->Gains_Q16[ k ], 16 );
+
+ /* Scale short term state */
+ for( i = 0; i < MAX_LPC_ORDER; i++ ) {
+ sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, sLPC_Q14[ i ] );
+ }
+ } else {
+ gain_adj_Q16 = (opus_int32)1 << 16;
+ }
+
+ /* Save inv_gain */
+ silk_assert( inv_gain_Q31 != 0 );
+ psDec->prev_gain_Q16 = psDecCtrl->Gains_Q16[ k ];
+
+ /* Avoid abrupt transition from voiced PLC to unvoiced normal decoding */
+ if( psDec->lossCnt && psDec->prevSignalType == TYPE_VOICED &&
+ psDec->indices.signalType != TYPE_VOICED && k < MAX_NB_SUBFR/2 ) {
+
+ silk_memset( B_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) );
+ B_Q14[ LTP_ORDER/2 ] = SILK_FIX_CONST( 0.25, 14 );
+
+ signalType = TYPE_VOICED;
+ psDecCtrl->pitchL[ k ] = psDec->lagPrev;
+ }
+
+ if( signalType == TYPE_VOICED ) {
+ /* Voiced */
+ lag = psDecCtrl->pitchL[ k ];
+
+ /* Re-whitening */
+ if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) {
+ /* Rewhiten with new A coefs */
+ start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
+ silk_assert( start_idx > 0 );
+
+ if( k == 2 ) {
+ silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) );
+ }
+
+ silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ],
+ A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order );
+
+ /* After rewhitening the LTP state is unscaled */
+ if( k == 0 ) {
+ /* Do LTP downscaling to reduce inter-packet dependency */
+ inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, psDecCtrl->LTP_scale_Q14 ), 2 );
+ }
+ for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
+ sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q31, sLTP[ psDec->ltp_mem_length - i - 1 ] );
+ }
+ } else {
+ /* Update LTP state when Gain changes */
+ if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+ for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
+ sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ sLTP_buf_idx - i - 1 ] );
+ }
+ }
+ }
+ }
+
+ /* Long-term prediction */
+ if( signalType == TYPE_VOICED ) {
+ /* Set up pointer */
+ pred_lag_ptr = &sLTP_Q15[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+ for( i = 0; i < psDec->subfr_length; i++ ) {
+ /* Unrolled loop */
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LTP_pred_Q13 = 2;
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], B_Q14[ 0 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
+ LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
+ pred_lag_ptr++;
+
+ /* Generate LPC excitation */
+ pres_Q14[ i ] = silk_ADD_LSHIFT32( pexc_Q14[ i ], LTP_pred_Q13, 1 );
+
+ /* Update states */
+ sLTP_Q15[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q14[ i ], 1 );
+ sLTP_buf_idx++;
+ }
+ } else {
+ pres_Q14 = pexc_Q14;
+ }
+
+ for( i = 0; i < psDec->subfr_length; i++ ) {
+ /* Short-term prediction */
+ silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12_tmp[ 0 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12_tmp[ 1 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12_tmp[ 2 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12_tmp[ 3 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12_tmp[ 4 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12_tmp[ 5 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12_tmp[ 6 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12_tmp[ 7 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12_tmp[ 8 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] );
+ if( psDec->LPC_order == 16 ) {
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12_tmp[ 10 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] );
+ LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] );
+ }
+
+ /* Add prediction to LPC excitation */
+ sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 );
+
+ /* Scale with gain */
+ pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
+ }
+
+ /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */
+
+ /* Update LPC filter state */
+ silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+ pexc_Q14 += psDec->subfr_length;
+ pxq += psDec->subfr_length;
+ }
+
+ /* Save LPC state */
+ silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/decode_frame.c b/drivers/opus/silk/decode_frame.c
new file mode 100644
index 0000000000..38500227c2
--- /dev/null
+++ b/drivers/opus/silk/decode_frame.c
@@ -0,0 +1,128 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+#include "PLC.h"
+
+/****************/
+/* Decode frame */
+/****************/
+opus_int silk_decode_frame(
+ silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int16 pOut[], /* O Pointer to output speech frame */
+ opus_int32 *pN, /* O Pointer to size of output frame */
+ opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ VARDECL( silk_decoder_control, psDecCtrl );
+ opus_int L, mv_len, ret = 0;
+ VARDECL( opus_int, pulses );
+ SAVE_STACK;
+
+ L = psDec->frame_length;
+ ALLOC( psDecCtrl, 1, silk_decoder_control );
+ ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+ ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
+ psDecCtrl->LTP_scale_Q14 = 0;
+
+ /* Safety checks */
+ silk_assert( L > 0 && L <= MAX_FRAME_LENGTH );
+
+ if( lostFlag == FLAG_DECODE_NORMAL ||
+ ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
+ {
+ /*********************************************/
+ /* Decode quantization indices of side info */
+ /*********************************************/
+ silk_decode_indices( psDec, psRangeDec, psDec->nFramesDecoded, lostFlag, condCoding );
+
+ /*********************************************/
+ /* Decode quantization indices of excitation */
+ /*********************************************/
+ silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType,
+ psDec->indices.quantOffsetType, psDec->frame_length );
+
+ /********************************************/
+ /* Decode parameters and pulse signal */
+ /********************************************/
+ silk_decode_parameters( psDec, psDecCtrl, condCoding );
+
+ /********************************************************/
+ /* Run inverse NSQ */
+ /********************************************************/
+ silk_decode_core( psDec, psDecCtrl, pOut, pulses );
+
+ /********************************************************/
+ /* Update PLC state */
+ /********************************************************/
+ silk_PLC( psDec, psDecCtrl, pOut, 0 );
+
+ psDec->lossCnt = 0;
+ psDec->prevSignalType = psDec->indices.signalType;
+ silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 );
+
+ /* A frame has been decoded without errors */
+ psDec->first_frame_after_reset = 0;
+ } else {
+ /* Handle packet loss by extrapolation */
+ silk_PLC( psDec, psDecCtrl, pOut, 1 );
+ }
+
+ /*************************/
+ /* Update output buffer. */
+ /*************************/
+ silk_assert( psDec->ltp_mem_length >= psDec->frame_length );
+ mv_len = psDec->ltp_mem_length - psDec->frame_length;
+ silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+ silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+
+ /****************************************************************/
+ /* Ensure smooth connection of extrapolated and good frames */
+ /****************************************************************/
+ silk_PLC_glue_frames( psDec, pOut, L );
+
+ /************************************************/
+ /* Comfort noise generation / estimation */
+ /************************************************/
+ silk_CNG( psDec, psDecCtrl, pOut, L );
+
+ /* Update some decoder state variables */
+ psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
+
+ /* Set output frame length */
+ *pN = L;
+
+ RESTORE_STACK;
+ return ret;
+}
diff --git a/drivers/opus/silk/decode_indices.c b/drivers/opus/silk/decode_indices.c
new file mode 100644
index 0000000000..c2aaad2606
--- /dev/null
+++ b/drivers/opus/silk/decode_indices.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Decode side-information parameters from payload */
+void silk_decode_indices(
+ silk_decoder_state *psDec, /* I/O State */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int FrameIndex, /* I Frame number */
+ opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ opus_int i, k, Ix;
+ opus_int decode_absolute_lagIndex, delta_lagIndex;
+ opus_int16 ec_ix[ MAX_LPC_ORDER ];
+ opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+
+ /*******************************************/
+ /* Decode signal type and quantizer offset */
+ /*******************************************/
+ if( decode_LBRR || psDec->VAD_flags[ FrameIndex ] ) {
+ Ix = ec_dec_icdf( psRangeDec, silk_type_offset_VAD_iCDF, 8 ) + 2;
+ } else {
+ Ix = ec_dec_icdf( psRangeDec, silk_type_offset_no_VAD_iCDF, 8 );
+ }
+ psDec->indices.signalType = (opus_int8)silk_RSHIFT( Ix, 1 );
+ psDec->indices.quantOffsetType = (opus_int8)( Ix & 1 );
+
+ /****************/
+ /* Decode gains */
+ /****************/
+ /* First subframe */
+ if( condCoding == CODE_CONDITIONALLY ) {
+ /* Conditional coding */
+ psDec->indices.GainsIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 );
+ } else {
+ /* Independent coding, in two stages: MSB bits followed by 3 LSBs */
+ psDec->indices.GainsIndices[ 0 ] = (opus_int8)silk_LSHIFT( ec_dec_icdf( psRangeDec, silk_gain_iCDF[ psDec->indices.signalType ], 8 ), 3 );
+ psDec->indices.GainsIndices[ 0 ] += (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform8_iCDF, 8 );
+ }
+
+ /* Remaining subframes */
+ for( i = 1; i < psDec->nb_subfr; i++ ) {
+ psDec->indices.GainsIndices[ i ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 );
+ }
+
+ /**********************/
+ /* Decode LSF Indices */
+ /**********************/
+ psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 );
+ silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] );
+ silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order );
+ for( i = 0; i < psDec->psNLSF_CB->order; i++ ) {
+ Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+ if( Ix == 0 ) {
+ Ix -= ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
+ } else if( Ix == 2 * NLSF_QUANT_MAX_AMPLITUDE ) {
+ Ix += ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
+ }
+ psDec->indices.NLSFIndices[ i+1 ] = (opus_int8)( Ix - NLSF_QUANT_MAX_AMPLITUDE );
+ }
+
+ /* Decode LSF interpolation factor */
+ if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+ psDec->indices.NLSFInterpCoef_Q2 = (opus_int8)ec_dec_icdf( psRangeDec, silk_NLSF_interpolation_factor_iCDF, 8 );
+ } else {
+ psDec->indices.NLSFInterpCoef_Q2 = 4;
+ }
+
+ if( psDec->indices.signalType == TYPE_VOICED )
+ {
+ /*********************/
+ /* Decode pitch lags */
+ /*********************/
+ /* Get lag index */
+ decode_absolute_lagIndex = 1;
+ if( condCoding == CODE_CONDITIONALLY && psDec->ec_prevSignalType == TYPE_VOICED ) {
+ /* Decode Delta index */
+ delta_lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_delta_iCDF, 8 );
+ if( delta_lagIndex > 0 ) {
+ delta_lagIndex = delta_lagIndex - 9;
+ psDec->indices.lagIndex = (opus_int16)( psDec->ec_prevLagIndex + delta_lagIndex );
+ decode_absolute_lagIndex = 0;
+ }
+ }
+ if( decode_absolute_lagIndex ) {
+ /* Absolute decoding */
+ psDec->indices.lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_lag_iCDF, 8 ) * silk_RSHIFT( psDec->fs_kHz, 1 );
+ psDec->indices.lagIndex += (opus_int16)ec_dec_icdf( psRangeDec, psDec->pitch_lag_low_bits_iCDF, 8 );
+ }
+ psDec->ec_prevLagIndex = psDec->indices.lagIndex;
+
+ /* Get countour index */
+ psDec->indices.contourIndex = (opus_int8)ec_dec_icdf( psRangeDec, psDec->pitch_contour_iCDF, 8 );
+
+ /********************/
+ /* Decode LTP gains */
+ /********************/
+ /* Decode PERIndex value */
+ psDec->indices.PERIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_per_index_iCDF, 8 );
+
+ for( k = 0; k < psDec->nb_subfr; k++ ) {
+ psDec->indices.LTPIndex[ k ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_gain_iCDF_ptrs[ psDec->indices.PERIndex ], 8 );
+ }
+
+ /**********************/
+ /* Decode LTP scaling */
+ /**********************/
+ if( condCoding == CODE_INDEPENDENTLY ) {
+ psDec->indices.LTP_scaleIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTPscale_iCDF, 8 );
+ } else {
+ psDec->indices.LTP_scaleIndex = 0;
+ }
+ }
+ psDec->ec_prevSignalType = psDec->indices.signalType;
+
+ /***************/
+ /* Decode seed */
+ /***************/
+ psDec->indices.Seed = (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform4_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/decode_parameters.c b/drivers/opus/silk/decode_parameters.c
new file mode 100644
index 0000000000..72df4fcdb2
--- /dev/null
+++ b/drivers/opus/silk/decode_parameters.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Decode parameters from payload */
+void silk_decode_parameters(
+ silk_decoder_state *psDec, /* I/O State */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ opus_int i, k, Ix;
+ opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], pNLSF0_Q15[ MAX_LPC_ORDER ];
+ const opus_int8 *cbk_ptr_Q7;
+
+ /* Dequant Gains */
+ silk_gains_dequant( psDecCtrl->Gains_Q16, psDec->indices.GainsIndices,
+ &psDec->LastGainIndex, condCoding == CODE_CONDITIONALLY, psDec->nb_subfr );
+
+ /****************/
+ /* Decode NLSFs */
+ /****************/
+ silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB );
+
+ /* Convert NLSF parameters to AR prediction filter coefficients */
+ silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order );
+
+ /* If just reset, e.g., because internal Fs changed, do not allow interpolation */
+ /* improves the case of packet loss in the first frame after a switch */
+ if( psDec->first_frame_after_reset == 1 ) {
+ psDec->indices.NLSFInterpCoef_Q2 = 4;
+ }
+
+ if( psDec->indices.NLSFInterpCoef_Q2 < 4 ) {
+ /* Calculation of the interpolated NLSF0 vector from the interpolation factor, */
+ /* the previous NLSF1, and the current NLSF1 */
+ for( i = 0; i < psDec->LPC_order; i++ ) {
+ pNLSF0_Q15[ i ] = psDec->prevNLSF_Q15[ i ] + silk_RSHIFT( silk_MUL( psDec->indices.NLSFInterpCoef_Q2,
+ pNLSF_Q15[ i ] - psDec->prevNLSF_Q15[ i ] ), 2 );
+ }
+
+ /* Convert NLSF parameters to AR prediction filter coefficients */
+ silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order );
+ } else {
+ /* Copy LPC coefficients for first half from second half */
+ silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) );
+ }
+
+ silk_memcpy( psDec->prevNLSF_Q15, pNLSF_Q15, psDec->LPC_order * sizeof( opus_int16 ) );
+
+ /* After a packet loss do BWE of LPC coefs */
+ if( psDec->lossCnt ) {
+ silk_bwexpander( psDecCtrl->PredCoef_Q12[ 0 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 );
+ silk_bwexpander( psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 );
+ }
+
+ if( psDec->indices.signalType == TYPE_VOICED ) {
+ /*********************/
+ /* Decode pitch lags */
+ /*********************/
+
+ /* Decode pitch values */
+ silk_decode_pitch( psDec->indices.lagIndex, psDec->indices.contourIndex, psDecCtrl->pitchL, psDec->fs_kHz, psDec->nb_subfr );
+
+ /* Decode Codebook Index */
+ cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ psDec->indices.PERIndex ]; /* set pointer to start of codebook */
+
+ for( k = 0; k < psDec->nb_subfr; k++ ) {
+ Ix = psDec->indices.LTPIndex[ k ];
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER + i ] = silk_LSHIFT( cbk_ptr_Q7[ Ix * LTP_ORDER + i ], 7 );
+ }
+ }
+
+ /**********************/
+ /* Decode LTP scaling */
+ /**********************/
+ Ix = psDec->indices.LTP_scaleIndex;
+ psDecCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ Ix ];
+ } else {
+ silk_memset( psDecCtrl->pitchL, 0, psDec->nb_subfr * sizeof( opus_int ) );
+ silk_memset( psDecCtrl->LTPCoef_Q14, 0, LTP_ORDER * psDec->nb_subfr * sizeof( opus_int16 ) );
+ psDec->indices.PERIndex = 0;
+ psDecCtrl->LTP_scale_Q14 = 0;
+ }
+}
diff --git a/drivers/opus/silk/decode_pitch.c b/drivers/opus/silk/decode_pitch.c
new file mode 100644
index 0000000000..3e1dd2d35b
--- /dev/null
+++ b/drivers/opus/silk/decode_pitch.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/***********************************************************
+* Pitch analyser function
+********************************************************** */
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+
+void silk_decode_pitch(
+ opus_int16 lagIndex, /* I */
+ opus_int8 contourIndex, /* O */
+ opus_int pitch_lags[], /* O 4 pitch values */
+ const opus_int Fs_kHz, /* I sampling frequency (kHz) */
+ const opus_int nb_subfr /* I number of sub frames */
+)
+{
+ opus_int lag, k, min_lag, max_lag, cbk_size;
+ const opus_int8 *Lag_CB_ptr;
+
+ if( Fs_kHz == 8 ) {
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+ cbk_size = PE_NB_CBKS_STAGE2_EXT;
+ } else {
+ silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+ Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+ cbk_size = PE_NB_CBKS_STAGE2_10MS;
+ }
+ } else {
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+ cbk_size = PE_NB_CBKS_STAGE3_MAX;
+ } else {
+ silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+ Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+ cbk_size = PE_NB_CBKS_STAGE3_10MS;
+ }
+ }
+
+ min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz );
+ max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz );
+ lag = min_lag + lagIndex;
+
+ for( k = 0; k < nb_subfr; k++ ) {
+ pitch_lags[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, contourIndex, cbk_size );
+ pitch_lags[ k ] = silk_LIMIT( pitch_lags[ k ], min_lag, max_lag );
+ }
+}
diff --git a/drivers/opus/silk/decode_pulses.c b/drivers/opus/silk/decode_pulses.c
new file mode 100644
index 0000000000..13772f8a57
--- /dev/null
+++ b/drivers/opus/silk/decode_pulses.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/*********************************************/
+/* Decode quantization indices of excitation */
+/*********************************************/
+void silk_decode_pulses(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int pulses[], /* O Excitation signal */
+ const opus_int signalType, /* I Sigtype */
+ const opus_int quantOffsetType, /* I quantOffsetType */
+ const opus_int frame_length /* I Frame length */
+)
+{
+ opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex;
+ opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
+ opus_int *pulses_ptr;
+ const opus_uint8 *cdf_ptr;
+
+ /*********************/
+ /* Decode rate level */
+ /*********************/
+ RateLevelIndex = ec_dec_icdf( psRangeDec, silk_rate_levels_iCDF[ signalType >> 1 ], 8 );
+
+ /* Calculate number of shell blocks */
+ silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
+ iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
+ if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
+ silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+ iter++;
+ }
+
+ /***************************************************/
+ /* Sum-Weighted-Pulses Decoding */
+ /***************************************************/
+ cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ];
+ for( i = 0; i < iter; i++ ) {
+ nLshifts[ i ] = 0;
+ sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 );
+
+ /* LSB indication */
+ while( sum_pulses[ i ] == MAX_PULSES + 1 ) {
+ nLshifts[ i ]++;
+ /* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */
+ sum_pulses[ i ] = ec_dec_icdf( psRangeDec,
+ silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 );
+ }
+ }
+
+ /***************************************************/
+ /* Shell decoding */
+ /***************************************************/
+ for( i = 0; i < iter; i++ ) {
+ if( sum_pulses[ i ] > 0 ) {
+ silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
+ } else {
+ silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
+ }
+ }
+
+ /***************************************************/
+ /* LSB Decoding */
+ /***************************************************/
+ for( i = 0; i < iter; i++ ) {
+ if( nLshifts[ i ] > 0 ) {
+ nLS = nLshifts[ i ];
+ pulses_ptr = &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ];
+ for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+ abs_q = pulses_ptr[ k ];
+ for( j = 0; j < nLS; j++ ) {
+ abs_q = silk_LSHIFT( abs_q, 1 );
+ abs_q += ec_dec_icdf( psRangeDec, silk_lsb_iCDF, 8 );
+ }
+ pulses_ptr[ k ] = abs_q;
+ }
+ /* Mark the number of pulses non-zero for sign decoding. */
+ sum_pulses[ i ] |= nLS << 5;
+ }
+ }
+
+ /****************************************/
+ /* Decode and add signs to pulse signal */
+ /****************************************/
+ silk_decode_signs( psRangeDec, pulses, frame_length, signalType, quantOffsetType, sum_pulses );
+}
diff --git a/drivers/opus/silk/decoder_set_fs.c b/drivers/opus/silk/decoder_set_fs.c
new file mode 100644
index 0000000000..6d2de56647
--- /dev/null
+++ b/drivers/opus/silk/decoder_set_fs.c
@@ -0,0 +1,108 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Set decoder sampling rate */
+opus_int silk_decoder_set_fs(
+ silk_decoder_state *psDec, /* I/O Decoder state pointer */
+ opus_int fs_kHz, /* I Sampling frequency (kHz) */
+ opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */
+)
+{
+ opus_int frame_length, ret = 0;
+
+ silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+ silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 );
+
+ /* New (sub)frame length */
+ psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz );
+ frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length );
+
+ /* Initialize resampler when switching internal or external sampling frequency */
+ if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) {
+ /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */
+ ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz, 0 );
+
+ psDec->fs_API_hz = fs_API_Hz;
+ }
+
+ if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) {
+ if( fs_kHz == 8 ) {
+ if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+ psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+ } else {
+ psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+ }
+ } else {
+ if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+ psDec->pitch_contour_iCDF = silk_pitch_contour_iCDF;
+ } else {
+ psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+ }
+ }
+ if( psDec->fs_kHz != fs_kHz ) {
+ psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
+ if( fs_kHz == 8 || fs_kHz == 12 ) {
+ psDec->LPC_order = MIN_LPC_ORDER;
+ psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB;
+ } else {
+ psDec->LPC_order = MAX_LPC_ORDER;
+ psDec->psNLSF_CB = &silk_NLSF_CB_WB;
+ }
+ if( fs_kHz == 16 ) {
+ psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
+ } else if( fs_kHz == 12 ) {
+ psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
+ } else if( fs_kHz == 8 ) {
+ psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
+ } else {
+ /* unsupported sampling rate */
+ silk_assert( 0 );
+ }
+ psDec->first_frame_after_reset = 1;
+ psDec->lagPrev = 100;
+ psDec->LastGainIndex = 10;
+ psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+ silk_memset( psDec->outBuf, 0, sizeof(psDec->outBuf));
+ silk_memset( psDec->sLPC_Q14_buf, 0, sizeof(psDec->sLPC_Q14_buf) );
+ }
+
+ psDec->fs_kHz = fs_kHz;
+ psDec->frame_length = frame_length;
+ }
+
+ /* Check that settings are valid */
+ silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH );
+
+ return ret;
+}
+
diff --git a/drivers/opus/silk/define.h b/drivers/opus/silk/define.h
new file mode 100644
index 0000000000..c47aca9f58
--- /dev/null
+++ b/drivers/opus/silk/define.h
@@ -0,0 +1,235 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_DEFINE_H
+#define SILK_DEFINE_H
+
+#include "errors.h"
+#include "typedef.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Max number of encoder channels (1/2) */
+#define ENCODER_NUM_CHANNELS 2
+/* Number of decoder channels (1/2) */
+#define DECODER_NUM_CHANNELS 2
+
+#define MAX_FRAMES_PER_PACKET 3
+
+/* Limits on bitrate */
+#define MIN_TARGET_RATE_BPS 5000
+#define MAX_TARGET_RATE_BPS 80000
+#define TARGET_RATE_TAB_SZ 8
+
+/* LBRR thresholds */
+#define LBRR_NB_MIN_RATE_BPS 12000
+#define LBRR_MB_MIN_RATE_BPS 14000
+#define LBRR_WB_MIN_RATE_BPS 16000
+
+/* DTX settings */
+#define NB_SPEECH_FRAMES_BEFORE_DTX 10 /* eq 200 ms */
+#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */
+
+/* Maximum sampling frequency */
+#define MAX_FS_KHZ 16
+#define MAX_API_FS_KHZ 48
+
+/* Signal types */
+#define TYPE_NO_VOICE_ACTIVITY 0
+#define TYPE_UNVOICED 1
+#define TYPE_VOICED 2
+
+/* Conditional coding types */
+#define CODE_INDEPENDENTLY 0
+#define CODE_INDEPENDENTLY_NO_LTP_SCALING 1
+#define CODE_CONDITIONALLY 2
+
+/* Settings for stereo processing */
+#define STEREO_QUANT_TAB_SIZE 16
+#define STEREO_QUANT_SUB_STEPS 5
+#define STEREO_INTERP_LEN_MS 8 /* must be even */
+#define STEREO_RATIO_SMOOTH_COEF 0.01 /* smoothing coef for signal norms and stereo width */
+
+/* Range of pitch lag estimates */
+#define PITCH_EST_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */
+#define PITCH_EST_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */
+
+/* Maximum number of subframes */
+#define MAX_NB_SUBFR 4
+
+/* Number of samples per frame */
+#define LTP_MEM_LENGTH_MS 20
+#define SUB_FRAME_LENGTH_MS 5
+#define MAX_SUB_FRAME_LENGTH ( SUB_FRAME_LENGTH_MS * MAX_FS_KHZ )
+#define MAX_FRAME_LENGTH_MS ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR )
+#define MAX_FRAME_LENGTH ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ )
+
+/* Milliseconds of lookahead for pitch analysis */
+#define LA_PITCH_MS 2
+#define LA_PITCH_MAX ( LA_PITCH_MS * MAX_FS_KHZ )
+
+/* Order of LPC used in find pitch */
+#define MAX_FIND_PITCH_LPC_ORDER 16
+
+/* Length of LPC window used in find pitch */
+#define FIND_PITCH_LPC_WIN_MS ( 20 + (LA_PITCH_MS << 1) )
+#define FIND_PITCH_LPC_WIN_MS_2_SF ( 10 + (LA_PITCH_MS << 1) )
+#define FIND_PITCH_LPC_WIN_MAX ( FIND_PITCH_LPC_WIN_MS * MAX_FS_KHZ )
+
+/* Milliseconds of lookahead for noise shape analysis */
+#define LA_SHAPE_MS 5
+#define LA_SHAPE_MAX ( LA_SHAPE_MS * MAX_FS_KHZ )
+
+/* Maximum length of LPC window used in noise shape analysis */
+#define SHAPE_LPC_WIN_MAX ( 15 * MAX_FS_KHZ )
+
+/* dB level of lowest gain quantization level */
+#define MIN_QGAIN_DB 2
+/* dB level of highest gain quantization level */
+#define MAX_QGAIN_DB 88
+/* Number of gain quantization levels */
+#define N_LEVELS_QGAIN 64
+/* Max increase in gain quantization index */
+#define MAX_DELTA_GAIN_QUANT 36
+/* Max decrease in gain quantization index */
+#define MIN_DELTA_GAIN_QUANT -4
+
+/* Quantization offsets (multiples of 4) */
+#define OFFSET_VL_Q10 32
+#define OFFSET_VH_Q10 100
+#define OFFSET_UVL_Q10 100
+#define OFFSET_UVH_Q10 240
+
+#define QUANT_LEVEL_ADJUST_Q10 80
+
+/* Maximum numbers of iterations used to stabilize an LPC vector */
+#define MAX_LPC_STABILIZE_ITERATIONS 16
+#define MAX_PREDICTION_POWER_GAIN 1e4f
+#define MAX_PREDICTION_POWER_GAIN_AFTER_RESET 1e2f
+
+#define MAX_LPC_ORDER 16
+#define MIN_LPC_ORDER 10
+
+/* Find Pred Coef defines */
+#define LTP_ORDER 5
+
+/* LTP quantization settings */
+#define NB_LTP_CBKS 3
+
+/* Flag to use harmonic noise shaping */
+#define USE_HARM_SHAPING 1
+
+/* Max LPC order of noise shaping filters */
+#define MAX_SHAPE_LPC_ORDER 16
+
+#define HARM_SHAPE_FIR_TAPS 3
+
+/* Maximum number of delayed decision states */
+#define MAX_DEL_DEC_STATES 4
+
+#define LTP_BUF_LENGTH 512
+#define LTP_MASK ( LTP_BUF_LENGTH - 1 )
+
+#define DECISION_DELAY 32
+#define DECISION_DELAY_MASK ( DECISION_DELAY - 1 )
+
+/* Number of subframes for excitation entropy coding */
+#define SHELL_CODEC_FRAME_LENGTH 16
+#define LOG2_SHELL_CODEC_FRAME_LENGTH 4
+#define MAX_NB_SHELL_BLOCKS ( MAX_FRAME_LENGTH / SHELL_CODEC_FRAME_LENGTH )
+
+/* Number of rate levels, for entropy coding of excitation */
+#define N_RATE_LEVELS 10
+
+/* Maximum sum of pulses per shell coding frame */
+#define MAX_PULSES 16
+
+#define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */
+
+#if( MAX_LPC_ORDER > DECISION_DELAY )
+# define NSQ_LPC_BUF_LENGTH MAX_LPC_ORDER
+#else
+# define NSQ_LPC_BUF_LENGTH DECISION_DELAY
+#endif
+
+/***************************/
+/* Voice activity detector */
+/***************************/
+#define VAD_N_BANDS 4
+
+#define VAD_INTERNAL_SUBFRAMES_LOG2 2
+#define VAD_INTERNAL_SUBFRAMES ( 1 << VAD_INTERNAL_SUBFRAMES_LOG2 )
+
+#define VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 1024 /* Must be < 4096 */
+#define VAD_NOISE_LEVELS_BIAS 50
+
+/* Sigmoid settings */
+#define VAD_NEGATIVE_OFFSET_Q5 128 /* sigmoid is 0 at -128 */
+#define VAD_SNR_FACTOR_Q16 45000
+
+/* smoothing for SNR measurement */
+#define VAD_SNR_SMOOTH_COEF_Q18 4096
+
+/* Size of the piecewise linear cosine approximation table for the LSFs */
+#define LSF_COS_TAB_SZ_FIX 128
+
+/******************/
+/* NLSF quantizer */
+/******************/
+#define NLSF_W_Q 2
+#define NLSF_VQ_MAX_VECTORS 32
+#define NLSF_VQ_MAX_SURVIVORS 32
+#define NLSF_QUANT_MAX_AMPLITUDE 4
+#define NLSF_QUANT_MAX_AMPLITUDE_EXT 10
+#define NLSF_QUANT_LEVEL_ADJ 0.1
+#define NLSF_QUANT_DEL_DEC_STATES_LOG2 2
+#define NLSF_QUANT_DEL_DEC_STATES ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 )
+
+/* Transition filtering for mode switching */
+#define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/
+#define TRANSITION_NB 3 /* Hardcoded in tables */
+#define TRANSITION_NA 2 /* Hardcoded in tables */
+#define TRANSITION_INT_NUM 5 /* Hardcoded in tables */
+#define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS )
+#define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) )
+
+/* BWE factors to apply after packet loss */
+#define BWE_AFTER_LOSS_Q16 63570
+
+/* Defines for CN generation */
+#define CNG_BUF_MASK_MAX 255 /* 2^floor(log2(MAX_FRAME_LENGTH))-1 */
+#define CNG_GAIN_SMTH_Q16 4634 /* 0.25^(1/4) */
+#define CNG_NLSF_SMTH_Q16 16348 /* 0.25 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/enc_API.c b/drivers/opus/silk/enc_API.c
new file mode 100644
index 0000000000..66a9bb67de
--- /dev/null
+++ b/drivers/opus/silk/enc_API.c
@@ -0,0 +1,556 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#include "define.h"
+#include "API.h"
+#include "control.h"
+#include "typedef.h"
+#include "stack_alloc.h"
+#include "structs.h"
+#include "tuning_parameters.h"
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+
+/***************************************/
+/* Read control structure from encoder */
+/***************************************/
+static opus_int silk_QueryEncoder( /* O Returns error code */
+ const void *encState, /* I State */
+ silk_EncControlStruct *encStatus /* O Encoder Status */
+);
+
+/****************************************/
+/* Encoder functions */
+/****************************************/
+
+opus_int silk_Get_Encoder_Size( /* O Returns error code */
+ opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */
+)
+{
+ opus_int ret = SILK_NO_ERROR;
+
+ *encSizeBytes = sizeof( silk_encoder );
+
+ return ret;
+}
+
+/*************************/
+/* Init or Reset encoder */
+/*************************/
+opus_int silk_InitEncoder( /* O Returns error code */
+ void *encState, /* I/O State */
+ int arch, /* I Run-time architecture */
+ silk_EncControlStruct *encStatus /* O Encoder Status */
+)
+{
+ silk_encoder *psEnc;
+ opus_int n, ret = SILK_NO_ERROR;
+
+ psEnc = (silk_encoder *)encState;
+
+ /* Reset encoder */
+ silk_memset( psEnc, 0, sizeof( silk_encoder ) );
+ for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
+ if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
+ silk_assert( 0 );
+ }
+ }
+
+ psEnc->nChannelsAPI = 1;
+ psEnc->nChannelsInternal = 1;
+
+ /* Read control structure */
+ if( ret += silk_QueryEncoder( encState, encStatus ) ) {
+ silk_assert( 0 );
+ }
+
+ return ret;
+}
+
+/***************************************/
+/* Read control structure from encoder */
+/***************************************/
+static opus_int silk_QueryEncoder( /* O Returns error code */
+ const void *encState, /* I State */
+ silk_EncControlStruct *encStatus /* O Encoder Status */
+)
+{
+ opus_int ret = SILK_NO_ERROR;
+ silk_encoder_state_Fxx *state_Fxx;
+ silk_encoder *psEnc = (silk_encoder *)encState;
+
+ state_Fxx = psEnc->state_Fxx;
+
+ encStatus->nChannelsAPI = psEnc->nChannelsAPI;
+ encStatus->nChannelsInternal = psEnc->nChannelsInternal;
+ encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz;
+ encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz;
+ encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz;
+ encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz;
+ encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms;
+ encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps;
+ encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc;
+ encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity;
+ encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC;
+ encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX;
+ encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR;
+ encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
+ encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch;
+ encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0;
+
+ return ret;
+}
+
+
+/**************************/
+/* Encode frame with Silk */
+/**************************/
+/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */
+/* encControl->payloadSize_ms is set to */
+opus_int silk_Encode( /* O Returns error code */
+ void *encState, /* I/O State */
+ silk_EncControlStruct *encControl, /* I Control status */
+ const opus_int16 *samplesIn, /* I Speech sample input vector */
+ opus_int nSamplesIn, /* I Number of samples in input vector */
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
+ const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
+)
+{
+ opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
+ opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
+ opus_int nSamplesFromInput = 0, nSamplesFromInputMax;
+ opus_int speech_act_thr_for_switch_Q8;
+ opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
+ silk_encoder *psEnc = ( silk_encoder * )encState;
+ VARDECL( opus_int16, buf );
+ opus_int transition, curr_block, tot_blocks;
+ SAVE_STACK;
+
+ if (encControl->reducedDependency)
+ {
+ psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
+ psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
+ }
+ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
+
+ /* Check values in encoder control structure */
+ if( ( ret = check_control_input( encControl ) != 0 ) ) {
+ silk_assert( 0 );
+ RESTORE_STACK;
+ return ret;
+ }
+
+ encControl->switchReady = 0;
+
+ if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
+ /* Mono -> Stereo transition: init state of second channel and stereo state */
+ ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
+ silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
+ silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
+ psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
+ psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
+ psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
+ psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
+ psEnc->sStereo.width_prev_Q14 = 0;
+ psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
+ if( psEnc->nChannelsAPI == 2 ) {
+ silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) );
+ silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) );
+ }
+ }
+
+ transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal);
+
+ psEnc->nChannelsAPI = encControl->nChannelsAPI;
+ psEnc->nChannelsInternal = encControl->nChannelsInternal;
+
+ nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
+ tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
+ curr_block = 0;
+ if( prefillFlag ) {
+ /* Only accept input length of 10 ms */
+ if( nBlocksOf10ms != 1 ) {
+ silk_assert( 0 );
+ RESTORE_STACK;
+ return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+ }
+ /* Reset Encoder */
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
+ silk_assert( !ret );
+ }
+ tmp_payloadSize_ms = encControl->payloadSize_ms;
+ encControl->payloadSize_ms = 10;
+ tmp_complexity = encControl->complexity;
+ encControl->complexity = 0;
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+ psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
+ }
+ } else {
+ /* Only accept input lengths that are a multiple of 10 ms */
+ if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
+ silk_assert( 0 );
+ RESTORE_STACK;
+ return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+ }
+ /* Make sure no more than one packet can be produced */
+ if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
+ silk_assert( 0 );
+ RESTORE_STACK;
+ return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+ }
+ }
+
+ TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 );
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ /* Force the side channel to the same rate as the mid */
+ opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
+ if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
+ silk_assert( 0 );
+ RESTORE_STACK;
+ return ret;
+ }
+ if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
+ for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
+ psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
+ }
+ }
+ psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
+ }
+ silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
+
+ /* Input buffering/resampling and encoding */
+ nSamplesToBufferMax =
+ 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
+ nSamplesFromInputMax =
+ silk_DIV32_16( nSamplesToBufferMax *
+ psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
+ psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
+ ALLOC( buf, nSamplesFromInputMax, opus_int16 );
+ while( 1 ) {
+ nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
+ nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
+ nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
+ /* Resample and write to buffer */
+ if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
+ opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
+ for( n = 0; n < nSamplesFromInput; n++ ) {
+ buf[ n ] = samplesIn[ 2 * n ];
+ }
+ /* Making sure to start both resamplers from the same state when switching from mono to stereo */
+ if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
+ silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
+ }
+
+ ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+ &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+
+ nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
+ nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
+ for( n = 0; n < nSamplesFromInput; n++ ) {
+ buf[ n ] = samplesIn[ 2 * n + 1 ];
+ }
+ ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
+ &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+
+ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
+ } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
+ /* Combine left and right channels before resampling */
+ for( n = 0; n < nSamplesFromInput; n++ ) {
+ sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
+ buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
+ }
+ ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+ &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+ /* On the first mono frame, average the results for the two resampler states */
+ if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) {
+ ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
+ &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+ for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
+ psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
+ silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
+ + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
+ }
+ }
+ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+ } else {
+ silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
+ silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
+ ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+ &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+ }
+
+ samplesIn += nSamplesFromInput * encControl->nChannelsAPI;
+ nSamplesIn -= nSamplesFromInput;
+
+ /* Default */
+ psEnc->allowBandwidthSwitch = 0;
+
+ /* Silk encoder */
+ if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
+ /* Enough data in input buffer, so encode */
+ silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+ silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
+
+ /* Deal with LBRR data */
+ if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
+ /* Create space at start of payload for VAD and FEC flags */
+ opus_uint8 iCDF[ 2 ] = { 0, 0 };
+ iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+ ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
+
+ /* Encode any LBRR data from previous packet */
+ /* Encode LBRR flags */
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ LBRR_symbol = 0;
+ for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
+ LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i );
+ }
+ psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0;
+ if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) {
+ ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 );
+ }
+ }
+
+ /* Code LBRR indices and excitation signals */
+ for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
+ opus_int condCoding;
+
+ if( encControl->nChannelsInternal == 2 && n == 0 ) {
+ silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
+ /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
+ if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) {
+ silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
+ }
+ }
+ /* Use conditional coding if previous frame available */
+ if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
+ condCoding = CODE_CONDITIONALLY;
+ } else {
+ condCoding = CODE_INDEPENDENTLY;
+ }
+ silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
+ silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
+ psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
+ }
+ }
+ }
+
+ /* Reset LBRR flags */
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
+ }
+ }
+
+ silk_HP_variable_cutoff( psEnc->state_Fxx );
+
+ /* Total target bits for packet */
+ nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
+ /* Subtract half of the bits already used */
+ if( !prefillFlag ) {
+ nBits -= ec_tell( psRangeEnc ) >> 1;
+ }
+ /* Divide by number of uncoded frames left in packet */
+ nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded );
+ /* Convert to bits/second */
+ if( encControl->payloadSize_ms == 10 ) {
+ TargetRate_bps = silk_SMULBB( nBits, 100 );
+ } else {
+ TargetRate_bps = silk_SMULBB( nBits, 50 );
+ }
+ /* Subtract fraction of bits in excess of target in previous packets */
+ TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
+ /* Never exceed input bitrate */
+ TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
+
+ /* Convert Left/Right to Mid/Side */
+ if( encControl->nChannelsInternal == 2 ) {
+ silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
+ psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
+ MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
+ psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+ if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+ /* Reset side channel encoder memory for first frame with side coding */
+ if( psEnc->prev_decode_only_middle == 1 ) {
+ silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
+ silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
+ silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
+ psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
+ psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
+ psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+ psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536;
+ psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
+ }
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+ } else {
+ psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
+ }
+ if( !prefillFlag ) {
+ silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+ if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+ silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+ }
+ }
+ } else {
+ /* Buffering */
+ silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+ silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
+ }
+ silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
+
+ /* Encode */
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ opus_int maxBits, useCBR;
+
+ /* Handling rate constraints */
+ maxBits = encControl->maxBits;
+ if( tot_blocks == 2 && curr_block == 0 ) {
+ maxBits = maxBits * 3 / 5;
+ } else if( tot_blocks == 3 ) {
+ if( curr_block == 0 ) {
+ maxBits = maxBits * 2 / 5;
+ } else if( curr_block == 1 ) {
+ maxBits = maxBits * 3 / 4;
+ }
+ }
+ useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
+
+ if( encControl->nChannelsInternal == 1 ) {
+ channelRate_bps = TargetRate_bps;
+ } else {
+ channelRate_bps = MStargetRates_bps[ n ];
+ if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
+ useCBR = 0;
+ /* Give mid up to 1/2 of the max bits for that frame */
+ maxBits -= encControl->maxBits / ( tot_blocks * 2 );
+ }
+ }
+
+ if( channelRate_bps > 0 ) {
+ opus_int condCoding;
+
+ silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
+
+ /* Use independent coding if no previous frame available */
+ if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
+ condCoding = CODE_INDEPENDENTLY;
+ } else if( n > 0 && psEnc->prev_decode_only_middle ) {
+ /* If we skipped a side frame in this packet, we don't
+ need LTP scaling; the LTP state is well-defined. */
+ condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
+ } else {
+ condCoding = CODE_CONDITIONALLY;
+ }
+ if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
+ silk_assert( 0 );
+ }
+ }
+ psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+ psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
+ psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
+ }
+ psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
+
+ /* Insert VAD and FEC flags at beginning of bitstream */
+ if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
+ flags = 0;
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
+ flags = silk_LSHIFT( flags, 1 );
+ flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ];
+ }
+ flags = silk_LSHIFT( flags, 1 );
+ flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
+ }
+ if( !prefillFlag ) {
+ ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+ }
+
+ /* Return zero bytes if all channels DTXed */
+ if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
+ *nBytesOut = 0;
+ }
+
+ psEnc->nBitsExceeded += *nBytesOut * 8;
+ psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
+ psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 );
+
+ /* Update flag indicating if bandwidth switching is allowed */
+ speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ),
+ SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms );
+ if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) {
+ psEnc->allowBandwidthSwitch = 1;
+ psEnc->timeSinceSwitchAllowed_ms = 0;
+ } else {
+ psEnc->allowBandwidthSwitch = 0;
+ psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms;
+ }
+ }
+
+ if( nSamplesIn == 0 ) {
+ break;
+ }
+ } else {
+ break;
+ }
+ curr_block++;
+ }
+
+ psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
+
+ encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
+ encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
+ encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
+ encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
+ if( prefillFlag ) {
+ encControl->payloadSize_ms = tmp_payloadSize_ms;
+ encControl->complexity = tmp_complexity;
+ for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+ psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+ psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0;
+ }
+ }
+
+ RESTORE_STACK;
+ return ret;
+}
+
diff --git a/drivers/opus/silk/encode_indices.c b/drivers/opus/silk/encode_indices.c
new file mode 100644
index 0000000000..c6679b34f6
--- /dev/null
+++ b/drivers/opus/silk/encode_indices.c
@@ -0,0 +1,181 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Encode side-information parameters to payload */
+void silk_encode_indices(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int FrameIndex, /* I Frame number */
+ opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ opus_int i, k, typeOffset;
+ opus_int encode_absolute_lagIndex, delta_lagIndex;
+ opus_int16 ec_ix[ MAX_LPC_ORDER ];
+ opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+ const SideInfoIndices *psIndices;
+
+ if( encode_LBRR ) {
+ psIndices = &psEncC->indices_LBRR[ FrameIndex ];
+ } else {
+ psIndices = &psEncC->indices;
+ }
+
+ /*******************************************/
+ /* Encode signal type and quantizer offset */
+ /*******************************************/
+ typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType;
+ silk_assert( typeOffset >= 0 && typeOffset < 6 );
+ silk_assert( encode_LBRR == 0 || typeOffset >= 2 );
+ if( encode_LBRR || typeOffset >= 2 ) {
+ ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 );
+ } else {
+ ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 );
+ }
+
+ /****************/
+ /* Encode gains */
+ /****************/
+ /* first subframe */
+ if( condCoding == CODE_CONDITIONALLY ) {
+ /* conditional coding */
+ silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 );
+ ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ], silk_delta_gain_iCDF, 8 );
+ } else {
+ /* independent coding, in two stages: MSB bits followed by 3 LSBs */
+ silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < N_LEVELS_QGAIN );
+ ec_enc_icdf( psRangeEnc, silk_RSHIFT( psIndices->GainsIndices[ 0 ], 3 ), silk_gain_iCDF[ psIndices->signalType ], 8 );
+ ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ] & 7, silk_uniform8_iCDF, 8 );
+ }
+
+ /* remaining subframes */
+ for( i = 1; i < psEncC->nb_subfr; i++ ) {
+ silk_assert( psIndices->GainsIndices[ i ] >= 0 && psIndices->GainsIndices[ i ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 );
+ ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 );
+ }
+
+ /****************/
+ /* Encode NLSFs */
+ /****************/
+ ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 );
+ silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] );
+ silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder );
+ for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) {
+ if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) {
+ ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+ ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
+ } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) {
+ ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+ ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
+ } else {
+ ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] + NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+ }
+ }
+
+ /* Encode NLSF interpolation factor */
+ if( psEncC->nb_subfr == MAX_NB_SUBFR ) {
+ silk_assert( psIndices->NLSFInterpCoef_Q2 >= 0 && psIndices->NLSFInterpCoef_Q2 < 5 );
+ ec_enc_icdf( psRangeEnc, psIndices->NLSFInterpCoef_Q2, silk_NLSF_interpolation_factor_iCDF, 8 );
+ }
+
+ if( psIndices->signalType == TYPE_VOICED )
+ {
+ /*********************/
+ /* Encode pitch lags */
+ /*********************/
+ /* lag index */
+ encode_absolute_lagIndex = 1;
+ if( condCoding == CODE_CONDITIONALLY && psEncC->ec_prevSignalType == TYPE_VOICED ) {
+ /* Delta Encoding */
+ delta_lagIndex = psIndices->lagIndex - psEncC->ec_prevLagIndex;
+ if( delta_lagIndex < -8 || delta_lagIndex > 11 ) {
+ delta_lagIndex = 0;
+ } else {
+ delta_lagIndex = delta_lagIndex + 9;
+ encode_absolute_lagIndex = 0; /* Only use delta */
+ }
+ silk_assert( delta_lagIndex >= 0 && delta_lagIndex < 21 );
+ ec_enc_icdf( psRangeEnc, delta_lagIndex, silk_pitch_delta_iCDF, 8 );
+ }
+ if( encode_absolute_lagIndex ) {
+ /* Absolute encoding */
+ opus_int32 pitch_high_bits, pitch_low_bits;
+ pitch_high_bits = silk_DIV32_16( psIndices->lagIndex, silk_RSHIFT( psEncC->fs_kHz, 1 ) );
+ pitch_low_bits = psIndices->lagIndex - silk_SMULBB( pitch_high_bits, silk_RSHIFT( psEncC->fs_kHz, 1 ) );
+ silk_assert( pitch_low_bits < psEncC->fs_kHz / 2 );
+ silk_assert( pitch_high_bits < 32 );
+ ec_enc_icdf( psRangeEnc, pitch_high_bits, silk_pitch_lag_iCDF, 8 );
+ ec_enc_icdf( psRangeEnc, pitch_low_bits, psEncC->pitch_lag_low_bits_iCDF, 8 );
+ }
+ psEncC->ec_prevLagIndex = psIndices->lagIndex;
+
+ /* Countour index */
+ silk_assert( psIndices->contourIndex >= 0 );
+ silk_assert( ( psIndices->contourIndex < 34 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 4 ) ||
+ ( psIndices->contourIndex < 11 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 4 ) ||
+ ( psIndices->contourIndex < 12 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 2 ) ||
+ ( psIndices->contourIndex < 3 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 2 ) );
+ ec_enc_icdf( psRangeEnc, psIndices->contourIndex, psEncC->pitch_contour_iCDF, 8 );
+
+ /********************/
+ /* Encode LTP gains */
+ /********************/
+ /* PERIndex value */
+ silk_assert( psIndices->PERIndex >= 0 && psIndices->PERIndex < 3 );
+ ec_enc_icdf( psRangeEnc, psIndices->PERIndex, silk_LTP_per_index_iCDF, 8 );
+
+ /* Codebook Indices */
+ for( k = 0; k < psEncC->nb_subfr; k++ ) {
+ silk_assert( psIndices->LTPIndex[ k ] >= 0 && psIndices->LTPIndex[ k ] < ( 8 << psIndices->PERIndex ) );
+ ec_enc_icdf( psRangeEnc, psIndices->LTPIndex[ k ], silk_LTP_gain_iCDF_ptrs[ psIndices->PERIndex ], 8 );
+ }
+
+ /**********************/
+ /* Encode LTP scaling */
+ /**********************/
+ if( condCoding == CODE_INDEPENDENTLY ) {
+ silk_assert( psIndices->LTP_scaleIndex >= 0 && psIndices->LTP_scaleIndex < 3 );
+ ec_enc_icdf( psRangeEnc, psIndices->LTP_scaleIndex, silk_LTPscale_iCDF, 8 );
+ }
+ silk_assert( !condCoding || psIndices->LTP_scaleIndex == 0 );
+ }
+
+ psEncC->ec_prevSignalType = psIndices->signalType;
+
+ /***************/
+ /* Encode seed */
+ /***************/
+ silk_assert( psIndices->Seed >= 0 && psIndices->Seed < 4 );
+ ec_enc_icdf( psRangeEnc, psIndices->Seed, silk_uniform4_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/encode_pulses.c b/drivers/opus/silk/encode_pulses.c
new file mode 100644
index 0000000000..d148b9d1e6
--- /dev/null
+++ b/drivers/opus/silk/encode_pulses.c
@@ -0,0 +1,206 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/*********************************************/
+/* Encode quantization indices of excitation */
+/*********************************************/
+
+static OPUS_INLINE opus_int combine_and_check( /* return ok */
+ opus_int *pulses_comb, /* O */
+ const opus_int *pulses_in, /* I */
+ opus_int max_pulses, /* I max value for sum of pulses */
+ opus_int len /* I number of output values */
+)
+{
+ opus_int k, sum;
+
+ for( k = 0; k < len; k++ ) {
+ sum = pulses_in[ 2 * k ] + pulses_in[ 2 * k + 1 ];
+ if( sum > max_pulses ) {
+ return 1;
+ }
+ pulses_comb[ k ] = sum;
+ }
+
+ return 0;
+}
+
+/* Encode quantization indices of excitation */
+void silk_encode_pulses(
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ const opus_int signalType, /* I Signal type */
+ const opus_int quantOffsetType, /* I quantOffsetType */
+ opus_int8 pulses[], /* I quantization indices */
+ const opus_int frame_length /* I Frame length */
+)
+{
+ opus_int i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0;
+ opus_int32 abs_q, minSumBits_Q5, sumBits_Q5;
+ VARDECL( opus_int, abs_pulses );
+ VARDECL( opus_int, sum_pulses );
+ VARDECL( opus_int, nRshifts );
+ opus_int pulses_comb[ 8 ];
+ opus_int *abs_pulses_ptr;
+ const opus_int8 *pulses_ptr;
+ const opus_uint8 *cdf_ptr;
+ const opus_uint8 *nBits_ptr;
+ SAVE_STACK;
+
+ silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/
+
+ /****************************/
+ /* Prepare for shell coding */
+ /****************************/
+ /* Calculate number of shell blocks */
+ silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
+ iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
+ if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
+ silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+ iter++;
+ silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8));
+ }
+
+ /* Take the absolute value of the pulses */
+ ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int );
+ silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) );
+ for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) {
+ abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] );
+ abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] );
+ abs_pulses[i+2] = ( opus_int )silk_abs( pulses[ i + 2 ] );
+ abs_pulses[i+3] = ( opus_int )silk_abs( pulses[ i + 3 ] );
+ }
+
+ /* Calc sum pulses per shell code frame */
+ ALLOC( sum_pulses, iter, opus_int );
+ ALLOC( nRshifts, iter, opus_int );
+ abs_pulses_ptr = abs_pulses;
+ for( i = 0; i < iter; i++ ) {
+ nRshifts[ i ] = 0;
+
+ while( 1 ) {
+ /* 1+1 -> 2 */
+ scale_down = combine_and_check( pulses_comb, abs_pulses_ptr, silk_max_pulses_table[ 0 ], 8 );
+ /* 2+2 -> 4 */
+ scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 1 ], 4 );
+ /* 4+4 -> 8 */
+ scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 2 ], 2 );
+ /* 8+8 -> 16 */
+ scale_down += combine_and_check( &sum_pulses[ i ], pulses_comb, silk_max_pulses_table[ 3 ], 1 );
+
+ if( scale_down ) {
+ /* We need to downscale the quantization signal */
+ nRshifts[ i ]++;
+ for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+ abs_pulses_ptr[ k ] = silk_RSHIFT( abs_pulses_ptr[ k ], 1 );
+ }
+ } else {
+ /* Jump out of while(1) loop and go to next shell coding frame */
+ break;
+ }
+ }
+ abs_pulses_ptr += SHELL_CODEC_FRAME_LENGTH;
+ }
+
+ /**************/
+ /* Rate level */
+ /**************/
+ /* find rate level that leads to fewest bits for coding of pulses per block info */
+ minSumBits_Q5 = silk_int32_MAX;
+ for( k = 0; k < N_RATE_LEVELS - 1; k++ ) {
+ nBits_ptr = silk_pulses_per_block_BITS_Q5[ k ];
+ sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ];
+ for( i = 0; i < iter; i++ ) {
+ if( nRshifts[ i ] > 0 ) {
+ sumBits_Q5 += nBits_ptr[ MAX_PULSES + 1 ];
+ } else {
+ sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ];
+ }
+ }
+ if( sumBits_Q5 < minSumBits_Q5 ) {
+ minSumBits_Q5 = sumBits_Q5;
+ RateLevelIndex = k;
+ }
+ }
+ ec_enc_icdf( psRangeEnc, RateLevelIndex, silk_rate_levels_iCDF[ signalType >> 1 ], 8 );
+
+ /***************************************************/
+ /* Sum-Weighted-Pulses Encoding */
+ /***************************************************/
+ cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ];
+ for( i = 0; i < iter; i++ ) {
+ if( nRshifts[ i ] == 0 ) {
+ ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 );
+ } else {
+ ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, cdf_ptr, 8 );
+ for( k = 0; k < nRshifts[ i ] - 1; k++ ) {
+ ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
+ }
+ ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
+ }
+ }
+
+ /******************/
+ /* Shell Encoding */
+ /******************/
+ for( i = 0; i < iter; i++ ) {
+ if( sum_pulses[ i ] > 0 ) {
+ silk_shell_encoder( psRangeEnc, &abs_pulses[ i * SHELL_CODEC_FRAME_LENGTH ] );
+ }
+ }
+
+ /****************/
+ /* LSB Encoding */
+ /****************/
+ for( i = 0; i < iter; i++ ) {
+ if( nRshifts[ i ] > 0 ) {
+ pulses_ptr = &pulses[ i * SHELL_CODEC_FRAME_LENGTH ];
+ nLS = nRshifts[ i ] - 1;
+ for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+ abs_q = (opus_int8)silk_abs( pulses_ptr[ k ] );
+ for( j = nLS; j > 0; j-- ) {
+ bit = silk_RSHIFT( abs_q, j ) & 1;
+ ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 );
+ }
+ bit = abs_q & 1;
+ ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 );
+ }
+ }
+ }
+
+ /****************/
+ /* Encode signs */
+ /****************/
+ silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses );
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/errors.h b/drivers/opus/silk/errors.h
new file mode 100644
index 0000000000..45070800f2
--- /dev/null
+++ b/drivers/opus/silk/errors.h
@@ -0,0 +1,98 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_ERRORS_H
+#define SILK_ERRORS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/******************/
+/* Error messages */
+/******************/
+#define SILK_NO_ERROR 0
+
+/**************************/
+/* Encoder error messages */
+/**************************/
+
+/* Input length is not a multiple of 10 ms, or length is longer than the packet length */
+#define SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES -101
+
+/* Sampling frequency not 8000, 12000 or 16000 Hertz */
+#define SILK_ENC_FS_NOT_SUPPORTED -102
+
+/* Packet size not 10, 20, 40, or 60 ms */
+#define SILK_ENC_PACKET_SIZE_NOT_SUPPORTED -103
+
+/* Allocated payload buffer too short */
+#define SILK_ENC_PAYLOAD_BUF_TOO_SHORT -104
+
+/* Loss rate not between 0 and 100 percent */
+#define SILK_ENC_INVALID_LOSS_RATE -105
+
+/* Complexity setting not valid, use 0...10 */
+#define SILK_ENC_INVALID_COMPLEXITY_SETTING -106
+
+/* Inband FEC setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_INBAND_FEC_SETTING -107
+
+/* DTX setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_DTX_SETTING -108
+
+/* CBR setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_CBR_SETTING -109
+
+/* Internal encoder error */
+#define SILK_ENC_INTERNAL_ERROR -110
+
+/* Internal encoder error */
+#define SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR -111
+
+/**************************/
+/* Decoder error messages */
+/**************************/
+
+/* Output sampling frequency lower than internal decoded sampling frequency */
+#define SILK_DEC_INVALID_SAMPLING_FREQUENCY -200
+
+/* Payload size exceeded the maximum allowed 1024 bytes */
+#define SILK_DEC_PAYLOAD_TOO_LARGE -201
+
+/* Payload has bit errors */
+#define SILK_DEC_PAYLOAD_ERROR -202
+
+/* Payload has bit errors */
+#define SILK_DEC_INVALID_FRAME_SIZE -203
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
new file mode 100644
index 0000000000..1df4b01d20
--- /dev/null
+++ b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+void silk_LTP_analysis_filter_FIX(
+ opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */
+ const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */
+ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */
+ const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */
+ const opus_int subfr_length, /* I Length of each subframe */
+ const opus_int nb_subfr, /* I Number of subframes */
+ const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */
+)
+{
+ const opus_int16 *x_ptr, *x_lag_ptr;
+ opus_int16 Btmp_Q14[ LTP_ORDER ];
+ opus_int16 *LTP_res_ptr;
+ opus_int k, i, j;
+ opus_int32 LTP_est;
+
+ x_ptr = x;
+ LTP_res_ptr = LTP_res;
+ for( k = 0; k < nb_subfr; k++ ) {
+
+ x_lag_ptr = x_ptr - pitchL[ k ];
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
+ }
+
+ /* LTP analysis FIR filter */
+ for( i = 0; i < subfr_length + pre_length; i++ ) {
+ LTP_res_ptr[ i ] = x_ptr[ i ];
+
+ /* Long-term prediction */
+ LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
+ for( j = 1; j < LTP_ORDER; j++ ) {
+ LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
+ }
+ LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/
+
+ /* Subtract long-term prediction */
+ LTP_res_ptr[ i ] = (opus_int16)silk_SAT16( (opus_int32)x_ptr[ i ] - LTP_est );
+
+ /* Scale residual */
+ LTP_res_ptr[ i ] = silk_SMULWB( invGains_Q16[ k ], LTP_res_ptr[ i ] );
+
+ x_lag_ptr++;
+ }
+
+ /* Update pointers */
+ LTP_res_ptr += subfr_length + pre_length;
+ x_ptr += subfr_length;
+ }
+}
+
diff --git a/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
new file mode 100644
index 0000000000..ab6923c5c9
--- /dev/null
+++ b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ opus_int round_loss;
+
+ if( condCoding == CODE_INDEPENDENTLY ) {
+ /* Only scale if first frame in packet */
+ round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket;
+ psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT(
+ silk_SMULWB( silk_SMULBB( round_loss, psEncCtrl->LTPredCodGain_Q7 ), SILK_FIX_CONST( 0.1, 9 ) ), 0, 2 );
+ } else {
+ /* Default is minimum scaling */
+ psEnc->sCmn.indices.LTP_scaleIndex = 0;
+ }
+ psEncCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ];
+}
diff --git a/drivers/opus/silk/fixed/apply_sine_window_FIX.c b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
new file mode 100644
index 0000000000..0998b49eca
--- /dev/null
+++ b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Apply sine window to signal vector. */
+/* Window types: */
+/* 1 -> sine window from 0 to pi/2 */
+/* 2 -> sine window from pi/2 to pi */
+/* Every other sample is linearly interpolated, for speed. */
+/* Window length must be between 16 and 120 (incl) and a multiple of 4. */
+
+/* Matlab code for table:
+ for k=16:9*4:16+2*9*4, fprintf(' %7.d,', -round(65536*pi ./ (k:4:k+8*4))); fprintf('\n'); end
+*/
+static const opus_int16 freq_table_Q16[ 27 ] = {
+ 12111, 9804, 8235, 7100, 6239, 5565, 5022, 4575, 4202,
+ 3885, 3612, 3375, 3167, 2984, 2820, 2674, 2542, 2422,
+ 2313, 2214, 2123, 2038, 1961, 1889, 1822, 1760, 1702,
+};
+
+void silk_apply_sine_window(
+ opus_int16 px_win[], /* O Pointer to windowed signal */
+ const opus_int16 px[], /* I Pointer to input signal */
+ const opus_int win_type, /* I Selects a window type */
+ const opus_int length /* I Window length, multiple of 4 */
+)
+{
+ opus_int k, f_Q16, c_Q16;
+ opus_int32 S0_Q16, S1_Q16;
+
+ silk_assert( win_type == 1 || win_type == 2 );
+
+ /* Length must be in a range from 16 to 120 and a multiple of 4 */
+ silk_assert( length >= 16 && length <= 120 );
+ silk_assert( ( length & 3 ) == 0 );
+
+ /* Frequency */
+ k = ( length >> 2 ) - 4;
+ silk_assert( k >= 0 && k <= 26 );
+ f_Q16 = (opus_int)freq_table_Q16[ k ];
+
+ /* Factor used for cosine approximation */
+ c_Q16 = silk_SMULWB( (opus_int32)f_Q16, -f_Q16 );
+ silk_assert( c_Q16 >= -32768 );
+
+ /* initialize state */
+ if( win_type == 1 ) {
+ /* start from 0 */
+ S0_Q16 = 0;
+ /* approximation of sin(f) */
+ S1_Q16 = f_Q16 + silk_RSHIFT( length, 3 );
+ } else {
+ /* start from 1 */
+ S0_Q16 = ( (opus_int32)1 << 16 );
+ /* approximation of cos(f) */
+ S1_Q16 = ( (opus_int32)1 << 16 ) + silk_RSHIFT( c_Q16, 1 ) + silk_RSHIFT( length, 4 );
+ }
+
+ /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */
+ /* 4 samples at a time */
+ for( k = 0; k < length; k += 4 ) {
+ px_win[ k ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k ] );
+ px_win[ k + 1 ] = (opus_int16)silk_SMULWB( S1_Q16, px[ k + 1] );
+ S0_Q16 = silk_SMULWB( S1_Q16, c_Q16 ) + silk_LSHIFT( S1_Q16, 1 ) - S0_Q16 + 1;
+ S0_Q16 = silk_min( S0_Q16, ( (opus_int32)1 << 16 ) );
+
+ px_win[ k + 2 ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k + 2] );
+ px_win[ k + 3 ] = (opus_int16)silk_SMULWB( S0_Q16, px[ k + 3 ] );
+ S1_Q16 = silk_SMULWB( S0_Q16, c_Q16 ) + silk_LSHIFT( S0_Q16, 1 ) - S1_Q16;
+ S1_Q16 = silk_min( S1_Q16, ( (opus_int32)1 << 16 ) );
+ }
+}
diff --git a/drivers/opus/silk/fixed/autocorr_FIX.c b/drivers/opus/silk/fixed/autocorr_FIX.c
new file mode 100644
index 0000000000..438b42f85b
--- /dev/null
+++ b/drivers/opus/silk/fixed/autocorr_FIX.c
@@ -0,0 +1,48 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "celt_lpc.h"
+
+/* Compute autocorrelation */
+void silk_autocorr(
+ opus_int32 *results, /* O Result (length correlationCount) */
+ opus_int *scale, /* O Scaling of the correlation vector */
+ const opus_int16 *inputData, /* I Input data to correlate */
+ const opus_int inputDataSize, /* I Length of input */
+ const opus_int correlationCount, /* I Number of correlation taps to compute */
+ int arch /* I Run-time architecture */
+)
+{
+ opus_int corrCount;
+ corrCount = silk_min_int( inputDataSize, correlationCount );
+ *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch);
+}
diff --git a/drivers/opus/silk/fixed/burg_modified_FIX.c b/drivers/opus/silk/fixed/burg_modified_FIX.c
new file mode 100644
index 0000000000..ce2a560e6d
--- /dev/null
+++ b/drivers/opus/silk/fixed/burg_modified_FIX.c
@@ -0,0 +1,279 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "tuning_parameters.h"
+#include "pitch.h"
+
+#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
+
+#define QA 25
+#define N_BITS_HEAD_ROOM 2
+#define MIN_RSHIFTS -16
+#define MAX_RSHIFTS (32 - QA)
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified(
+ opus_int32 *res_nrg, /* O Residual energy */
+ opus_int *res_nrg_Q, /* O Residual energy Q value */
+ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
+ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */
+ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */
+ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */
+ const opus_int nb_subfr, /* I Number of subframes stacked in x */
+ const opus_int D, /* I Order */
+ int arch /* I Run-time architecture */
+)
+{
+ opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+ opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
+ const opus_int16 *x_ptr;
+ opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ];
+ opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ];
+ opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ];
+ opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ];
+ opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ];
+ opus_int32 xcorr[ SILK_MAX_ORDER_LPC ];
+
+ silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+ /* Compute autocorrelations, added over subframes */
+ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
+ if( rshifts > MAX_RSHIFTS ) {
+ C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
+ silk_assert( C0 > 0 );
+ rshifts = MAX_RSHIFTS;
+ } else {
+ lz = silk_CLZ32( C0 ) - 1;
+ rshifts_extra = N_BITS_HEAD_ROOM - lz;
+ if( rshifts_extra > 0 ) {
+ rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
+ C0 = silk_RSHIFT32( C0, rshifts_extra );
+ } else {
+ rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
+ C0 = silk_LSHIFT32( C0, -rshifts_extra );
+ }
+ rshifts += rshifts_extra;
+ }
+ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
+ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+ if( rshifts > 0 ) {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ for( n = 1; n < D + 1; n++ ) {
+ C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
+ silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
+ }
+ }
+ } else {
+ for( s = 0; s < nb_subfr; s++ ) {
+ int i;
+ opus_int32 d;
+ x_ptr = x + s * subfr_length;
+ celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
+ for( n = 1; n < D + 1; n++ ) {
+ for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
+ d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
+ xcorr[ n - 1 ] += d;
+ }
+ for( n = 1; n < D + 1; n++ ) {
+ C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
+ }
+ }
+ }
+ silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+
+ /* Initialize */
+ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
+
+ invGain_Q30 = (opus_int32)1 << 30;
+ reached_max_gain = 0;
+ for( n = 0; n < D; n++ ) {
+ /* Update first row of correlation matrix (without first element) */
+ /* Update last row of correlation matrix (without last element, stored in reversed order) */
+ /* Update C * Af */
+ /* Update C * flipud(Af) (stored in reversed order) */
+ if( rshifts > -2 ) {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */
+ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */
+ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */
+ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */
+ for( k = 0; k < n; k++ ) {
+ C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
+ C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+ Atmp_QA = Af_QA[ k ];
+ tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */
+ tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */
+ }
+ tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */
+ tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */
+ for( k = 0; k <= n; k++ ) {
+ CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */
+ CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */
+ }
+ }
+ } else {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */
+ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */
+ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */
+ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */
+ for( k = 0; k < n; k++ ) {
+ C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
+ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */
+ tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */
+ tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */
+ }
+ tmp1 = -tmp1; /* Q17 */
+ tmp2 = -tmp2; /* Q17 */
+ for( k = 0; k <= n; k++ ) {
+ CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
+ silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */
+ CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
+ silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
+ }
+ }
+ }
+
+ /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+ tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */
+ tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */
+ num = 0; /* Q( -rshifts ) */
+ nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */
+ for( k = 0; k < n; k++ ) {
+ Atmp_QA = Af_QA[ k ];
+ lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
+ lz = silk_min( 32 - QA, lz );
+ Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */
+
+ tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
+ tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
+ num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */
+ nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
+ Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */
+ }
+ CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */
+ CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */
+ num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */
+ num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */
+
+ /* Calculate the next order reflection (parcor) coefficient */
+ if( silk_abs( num ) < nrg ) {
+ rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
+ } else {
+ rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
+ }
+
+ /* Update inverse prediction gain */
+ tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+ tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
+ if( tmp1 <= minInvGain_Q30 ) {
+ /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */
+ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */
+ /* Newton-Raphson iteration */
+ rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */
+ rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */
+ if( num < 0 ) {
+ /* Ensure adjusted reflection coefficients has the original sign */
+ rc_Q31 = -rc_Q31;
+ }
+ invGain_Q30 = minInvGain_Q30;
+ reached_max_gain = 1;
+ } else {
+ invGain_Q30 = tmp1;
+ }
+
+ /* Update the AR coefficients */
+ for( k = 0; k < (n + 1) >> 1; k++ ) {
+ tmp1 = Af_QA[ k ]; /* QA */
+ tmp2 = Af_QA[ n - k - 1 ]; /* QA */
+ Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */
+ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */
+ }
+ Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */
+
+ if( reached_max_gain ) {
+ /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+ for( k = n + 1; k < D; k++ ) {
+ Af_QA[ k ] = 0;
+ }
+ break;
+ }
+
+ /* Update C * Af and C * Ab */
+ for( k = 0; k <= n + 1; k++ ) {
+ tmp1 = CAf[ k ]; /* Q( -rshifts ) */
+ tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */
+ CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */
+ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */
+ }
+ }
+
+ if( reached_max_gain ) {
+ for( k = 0; k < D; k++ ) {
+ /* Scale coefficients */
+ A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
+ }
+ /* Subtract energy of preceding samples from C0 */
+ if( rshifts > 0 ) {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
+ }
+ } else {
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
+ }
+ }
+ /* Approximate residual energy */
+ *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
+ *res_nrg_Q = -rshifts;
+ } else {
+ /* Return residual energy */
+ nrg = CAf[ 0 ]; /* Q( -rshifts ) */
+ tmp1 = (opus_int32)1 << 16; /* Q16 */
+ for( k = 0; k < D; k++ ) {
+ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */
+ nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */
+ tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */
+ A_Q16[ k ] = -Atmp1;
+ }
+ *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
+ *res_nrg_Q = -rshifts;
+ }
+}
diff --git a/drivers/opus/silk/fixed/corrMatrix_FIX.c b/drivers/opus/silk/fixed/corrMatrix_FIX.c
new file mode 100644
index 0000000000..28543fc204
--- /dev/null
+++ b/drivers/opus/silk/fixed/corrMatrix_FIX.c
@@ -0,0 +1,156 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/**********************************************************************
+ * Correlation Matrix Computations for LS estimate.
+ **********************************************************************/
+
+#include "main_FIX.h"
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FIX(
+ const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */
+ const opus_int16 *t, /* I Target vector [L] */
+ const opus_int L, /* I Length of vectors */
+ const opus_int order, /* I Max lag for correlation */
+ opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */
+ const opus_int rshifts /* I Right shifts of correlations */
+)
+{
+ opus_int lag, i;
+ const opus_int16 *ptr1, *ptr2;
+ opus_int32 inner_prod;
+
+ ptr1 = &x[ order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */
+ ptr2 = t;
+ /* Calculate X'*t */
+ if( rshifts > 0 ) {
+ /* Right shifting used */
+ for( lag = 0; lag < order; lag++ ) {
+ inner_prod = 0;
+ for( i = 0; i < L; i++ ) {
+ inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts );
+ }
+ Xt[ lag ] = inner_prod; /* X[:,lag]'*t */
+ ptr1--; /* Go to next column of X */
+ }
+ } else {
+ silk_assert( rshifts == 0 );
+ for( lag = 0; lag < order; lag++ ) {
+ Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */
+ ptr1--; /* Go to next column of X */
+ }
+ }
+}
+
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FIX(
+ const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */
+ const opus_int L, /* I Length of vectors */
+ const opus_int order, /* I Max lag for correlation */
+ const opus_int head_room, /* I Desired headroom */
+ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */
+ opus_int *rshifts /* I/O Right shifts of correlations */
+)
+{
+ opus_int i, j, lag, rshifts_local, head_room_rshifts;
+ opus_int32 energy;
+ const opus_int16 *ptr1, *ptr2;
+
+ /* Calculate energy to find shift used to fit in 32 bits */
+ silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 );
+ /* Add shifts to get the desired head room */
+ head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 );
+
+ energy = silk_RSHIFT32( energy, head_room_rshifts );
+ rshifts_local += head_room_rshifts;
+
+ /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */
+ /* Remove contribution of first order - 1 samples */
+ for( i = 0; i < order - 1; i++ ) {
+ energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local );
+ }
+ if( rshifts_local < *rshifts ) {
+ /* Adjust energy */
+ energy = silk_RSHIFT32( energy, *rshifts - rshifts_local );
+ rshifts_local = *rshifts;
+ }
+
+ /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */
+ /* Fill out the diagonal of the correlation matrix */
+ matrix_ptr( XX, 0, 0, order ) = energy;
+ ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */
+ for( j = 1; j < order; j++ ) {
+ energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) );
+ energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) );
+ matrix_ptr( XX, j, j, order ) = energy;
+ }
+
+ ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */
+ /* Calculate the remaining elements of the correlation matrix */
+ if( rshifts_local > 0 ) {
+ /* Right shifting used */
+ for( lag = 1; lag < order; lag++ ) {
+ /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
+ energy = 0;
+ for( i = 0; i < L; i++ ) {
+ energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local );
+ }
+ /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
+ matrix_ptr( XX, lag, 0, order ) = energy;
+ matrix_ptr( XX, 0, lag, order ) = energy;
+ for( j = 1; j < ( order - lag ); j++ ) {
+ energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) );
+ energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) );
+ matrix_ptr( XX, lag + j, j, order ) = energy;
+ matrix_ptr( XX, j, lag + j, order ) = energy;
+ }
+ ptr2--; /* Update pointer to first sample of next column (lag) in X */
+ }
+ } else {
+ for( lag = 1; lag < order; lag++ ) {
+ /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
+ energy = silk_inner_prod_aligned( ptr1, ptr2, L );
+ matrix_ptr( XX, lag, 0, order ) = energy;
+ matrix_ptr( XX, 0, lag, order ) = energy;
+ /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
+ for( j = 1; j < ( order - lag ); j++ ) {
+ energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) );
+ energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] );
+ matrix_ptr( XX, lag + j, j, order ) = energy;
+ matrix_ptr( XX, j, lag + j, order ) = energy;
+ }
+ ptr2--;/* Update pointer to first sample of next column (lag) in X */
+ }
+ }
+ *rshifts = rshifts_local;
+}
+
diff --git a/drivers/opus/silk/fixed/encode_frame_FIX.c b/drivers/opus/silk/fixed/encode_frame_FIX.c
new file mode 100644
index 0000000000..2d80ca3583
--- /dev/null
+++ b/drivers/opus/silk/fixed/encode_frame_FIX.c
@@ -0,0 +1,385 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
+static OPUS_INLINE void silk_LBRR_encode_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */
+ const opus_int32 xfw_Q3[], /* I Input signal */
+ opus_int condCoding /* I The type of conditional coding used so far for this frame */
+);
+
+void silk_encode_do_VAD_FIX(
+ silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
+)
+{
+ /****************************/
+ /* Voice Activity Detection */
+ /****************************/
+ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+
+ /**************************************************/
+ /* Convert speech activity into VAD and DTX flags */
+ /**************************************************/
+ if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+ psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
+ psEnc->sCmn.noSpeechCounter++;
+ if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+ psEnc->sCmn.inDTX = 0;
+ } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
+ psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
+ psEnc->sCmn.inDTX = 0;
+ }
+ psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
+ } else {
+ psEnc->sCmn.noSpeechCounter = 0;
+ psEnc->sCmn.inDTX = 0;
+ psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+ psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+ }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ opus_int condCoding, /* I The type of conditional coding to use */
+ opus_int maxBits, /* I If > 0: maximum number of output bits */
+ opus_int useCBR /* I Flag to force constant-bitrate operation */
+)
+{
+ silk_encoder_control_FIX sEncCtrl;
+ opus_int i, iter, maxIter, found_upper, found_lower, ret = 0;
+ opus_int16 *x_frame;
+ ec_enc sRangeEnc_copy, sRangeEnc_copy2;
+ silk_nsq_state sNSQ_copy, sNSQ_copy2;
+ opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
+ opus_int32 gainsID, gainsID_lower, gainsID_upper;
+ opus_int16 gainMult_Q8;
+ opus_int16 ec_prevLagIndex_copy;
+ opus_int ec_prevSignalType_copy;
+ opus_int8 LastGainIndex_copy2;
+ SAVE_STACK;
+
+ /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
+ LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
+
+ psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+ /**************************************************************/
+ /* Set up Input Pointers, and insert frame in input buffer */
+ /*************************************************************/
+ /* start of frame to encode */
+ x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;
+
+ /***************************************/
+ /* Ensure smooth bandwidth transitions */
+ /***************************************/
+ silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+ /*******************************************/
+ /* Copy new frame to front of input buffer */
+ /*******************************************/
+ silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );
+
+ if( !psEnc->sCmn.prefillFlag ) {
+ VARDECL( opus_int32, xfw_Q3 );
+ VARDECL( opus_int16, res_pitch );
+ VARDECL( opus_uint8, ec_buf_copy );
+ opus_int16 *res_pitch_frame;
+
+ ALLOC( res_pitch,
+ psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length
+ + psEnc->sCmn.ltp_mem_length, opus_int16 );
+ /* start of pitch LPC residual frame */
+ res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length;
+
+ /*****************************************/
+ /* Find pitch lags, initial LPC analysis */
+ /*****************************************/
+ silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
+
+ /************************/
+ /* Noise shape analysis */
+ /************************/
+ silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch );
+
+ /***************************************************/
+ /* Find linear prediction coefficients (LPC + LTP) */
+ /***************************************************/
+ silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
+
+ /****************************************/
+ /* Process gains */
+ /****************************************/
+ silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding );
+
+ /*****************************************/
+ /* Prefiltering for noise shaper */
+ /*****************************************/
+ ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 );
+ silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );
+
+ /****************************************/
+ /* Low Bitrate Redundant Encoding */
+ /****************************************/
+ silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding );
+
+ /* Loop over quantizer and entropy coding to control bitrate */
+ maxIter = 6;
+ gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
+ found_lower = 0;
+ found_upper = 0;
+ gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+ gainsID_lower = -1;
+ gainsID_upper = -1;
+ /* Copy part of the input state */
+ silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
+ silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+ seed_copy = psEnc->sCmn.indices.Seed;
+ ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
+ ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
+ ALLOC( ec_buf_copy, 1275, opus_uint8 );
+ for( iter = 0; ; iter++ ) {
+ if( gainsID == gainsID_lower ) {
+ nBits = nBits_lower;
+ } else if( gainsID == gainsID_upper ) {
+ nBits = nBits_upper;
+ } else {
+ /* Restore part of the input state */
+ if( iter > 0 ) {
+ silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
+ silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
+ psEnc->sCmn.indices.Seed = seed_copy;
+ psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
+ psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
+ }
+
+ /*****************************************/
+ /* Noise shaping quantization */
+ /*****************************************/
+ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+ silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
+ sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
+ sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+ } else {
+ silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
+ sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
+ sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+ }
+
+ /****************************************/
+ /* Encode Parameters */
+ /****************************************/
+ silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
+
+ /****************************************/
+ /* Encode Excitation Signal */
+ /****************************************/
+ silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
+ psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
+
+ nBits = ec_tell( psRangeEnc );
+
+ if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
+ break;
+ }
+ }
+
+ if( iter == maxIter ) {
+ if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
+ /* Restore output state from earlier iteration that did meet the bitrate budget */
+ silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
+ silk_assert( sRangeEnc_copy2.offs <= 1275 );
+ silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
+ silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
+ psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
+ }
+ break;
+ }
+
+ if( nBits > maxBits ) {
+ if( found_lower == 0 && iter >= 2 ) {
+ /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
+ sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 );
+ found_upper = 0;
+ gainsID_upper = -1;
+ } else {
+ found_upper = 1;
+ nBits_upper = nBits;
+ gainMult_upper = gainMult_Q8;
+ gainsID_upper = gainsID;
+ }
+ } else if( nBits < maxBits - 5 ) {
+ found_lower = 1;
+ nBits_lower = nBits;
+ gainMult_lower = gainMult_Q8;
+ if( gainsID != gainsID_lower ) {
+ gainsID_lower = gainsID;
+ /* Copy part of the output state */
+ silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
+ silk_assert( psRangeEnc->offs <= 1275 );
+ silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
+ silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+ LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
+ }
+ } else {
+ /* Within 5 bits of budget: close enough */
+ break;
+ }
+
+ if( ( found_lower & found_upper ) == 0 ) {
+ /* Adjust gain according to high-rate rate/distortion curve */
+ opus_int32 gain_factor_Q16;
+ gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
+ gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
+ if( nBits > maxBits ) {
+ gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
+ }
+ gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+ } else {
+ /* Adjust gain by interpolating */
+ gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
+ /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
+ if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
+ gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
+ } else
+ if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
+ gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
+ }
+ }
+
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
+ }
+
+ /* Quantize gains */
+ psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
+ silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
+ &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+ /* Unique identifier of gains vector */
+ gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+ }
+ }
+
+ /* Update input buffer */
+ silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
+ ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) );
+
+ /* Exit without entropy coding */
+ if( psEnc->sCmn.prefillFlag ) {
+ /* No payload */
+ *pnBytesOut = 0;
+ RESTORE_STACK;
+ return ret;
+ }
+
+ /* Parameters needed for next frame */
+ psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+ psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
+ /****************************************/
+ /* Finalize payload */
+ /****************************************/
+ psEnc->sCmn.first_frame_after_reset = 0;
+ /* Payload size */
+ *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
+
+ RESTORE_STACK;
+ return ret;
+}
+
+/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */
+static OPUS_INLINE void silk_LBRR_encode_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */
+ const opus_int32 xfw_Q3[], /* I Input signal */
+ opus_int condCoding /* I The type of conditional coding used so far for this frame */
+)
+{
+ opus_int32 TempGains_Q16[ MAX_NB_SUBFR ];
+ SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
+ silk_nsq_state sNSQ_LBRR;
+
+ /*******************************************/
+ /* Control use of inband LBRR */
+ /*******************************************/
+ if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
+ psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+
+ /* Copy noise shaping quantizer state and quantization indices from regular encoding */
+ silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+ silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
+
+ /* Save original gains */
+ silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+
+ if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
+ /* First frame in packet or previous frame not LBRR coded */
+ psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
+
+ /* Increase Gains to get target LBRR rate */
+ psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases;
+ psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
+ }
+
+ /* Decode to get gains in sync with decoder */
+ /* Overwrite unquantized gains with quantized gains */
+ silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices,
+ &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+ /*****************************************/
+ /* Noise shaping quantization */
+ /*****************************************/
+ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+ silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
+ psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
+ psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
+ psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+ } else {
+ silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
+ psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
+ psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
+ psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+ }
+
+ /* Restore original gains */
+ silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+ }
+}
diff --git a/drivers/opus/silk/fixed/find_LPC_FIX.c b/drivers/opus/silk/fixed/find_LPC_FIX.c
new file mode 100644
index 0000000000..a46cdb7515
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_LPC_FIX.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Finds LPC vector from correlations, and converts to NLSF */
+void silk_find_LPC_FIX(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ opus_int16 NLSF_Q15[], /* O NLSFs */
+ const opus_int16 x[], /* I Input signal */
+ const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */
+)
+{
+ opus_int k, subfr_length;
+ opus_int32 a_Q16[ MAX_LPC_ORDER ];
+ opus_int isInterpLower, shift;
+ opus_int32 res_nrg0, res_nrg1;
+ opus_int rshift0, rshift1;
+
+ /* Used only for LSF interpolation */
+ opus_int32 a_tmp_Q16[ MAX_LPC_ORDER ], res_nrg_interp, res_nrg, res_tmp_nrg;
+ opus_int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q;
+ opus_int16 a_tmp_Q12[ MAX_LPC_ORDER ];
+ opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ];
+ SAVE_STACK;
+
+ subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
+
+ /* Default: no interpolation */
+ psEncC->indices.NLSFInterpCoef_Q2 = 4;
+
+ /* Burg AR analysis for the full frame */
+ silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch );
+
+ if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
+ VARDECL( opus_int16, LPC_res );
+
+ /* Optimal solution for last 10 ms */
+ silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch );
+
+ /* subtract residual energy here, as that's easier than adding it to the */
+ /* residual energy of the first 10 ms in each iteration of the search below */
+ shift = res_tmp_nrg_Q - res_nrg_Q;
+ if( shift >= 0 ) {
+ if( shift < 32 ) {
+ res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift );
+ }
+ } else {
+ silk_assert( shift > -32 );
+ res_nrg = silk_RSHIFT( res_nrg, -shift ) - res_tmp_nrg;
+ res_nrg_Q = res_tmp_nrg_Q;
+ }
+
+ /* Convert to NLSFs */
+ silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder );
+
+ ALLOC( LPC_res, 2 * subfr_length, opus_int16 );
+
+ /* Search over interpolation indices to find the one with lowest residual energy */
+ for( k = 3; k >= 0; k-- ) {
+ /* Interpolate NLSFs for first half */
+ silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder );
+
+ /* Convert to LPC for residual energy evaluation */
+ silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder );
+
+ /* Calculate residual energy with NLSF interpolation */
+ silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder );
+
+ silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder );
+ silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder );
+
+ /* Add subframe energies from first half frame */
+ shift = rshift0 - rshift1;
+ if( shift >= 0 ) {
+ res_nrg1 = silk_RSHIFT( res_nrg1, shift );
+ res_nrg_interp_Q = -rshift0;
+ } else {
+ res_nrg0 = silk_RSHIFT( res_nrg0, -shift );
+ res_nrg_interp_Q = -rshift1;
+ }
+ res_nrg_interp = silk_ADD32( res_nrg0, res_nrg1 );
+
+ /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */
+ shift = res_nrg_interp_Q - res_nrg_Q;
+ if( shift >= 0 ) {
+ if( silk_RSHIFT( res_nrg_interp, shift ) < res_nrg ) {
+ isInterpLower = silk_TRUE;
+ } else {
+ isInterpLower = silk_FALSE;
+ }
+ } else {
+ if( -shift < 32 ) {
+ if( res_nrg_interp < silk_RSHIFT( res_nrg, -shift ) ) {
+ isInterpLower = silk_TRUE;
+ } else {
+ isInterpLower = silk_FALSE;
+ }
+ } else {
+ isInterpLower = silk_FALSE;
+ }
+ }
+
+ /* Determine whether current interpolated NLSFs are best so far */
+ if( isInterpLower == silk_TRUE ) {
+ /* Interpolation has lower residual energy */
+ res_nrg = res_nrg_interp;
+ res_nrg_Q = res_nrg_interp_Q;
+ psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k;
+ }
+ }
+ }
+
+ if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
+ /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
+ silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder );
+ }
+
+ silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/find_LTP_FIX.c b/drivers/opus/silk/fixed/find_LTP_FIX.c
new file mode 100644
index 0000000000..a1d152eee4
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_LTP_FIX.c
@@ -0,0 +1,244 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "tuning_parameters.h"
+
+/* Head room for correlations */
+#define LTP_CORRS_HEAD_ROOM 2
+
+void silk_fit_LTP(
+ opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
+ opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
+);
+
+void silk_find_LTP_FIX(
+ opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */
+ opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */
+ opus_int *LTPredCodGain_Q7, /* O LTP coding gain */
+ const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */
+ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
+ const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */
+ const opus_int subfr_length, /* I subframe length */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int mem_offset, /* I number of samples in LTP memory */
+ opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */
+)
+{
+ opus_int i, k, lshift;
+ const opus_int16 *r_ptr, *lag_ptr;
+ opus_int16 *b_Q14_ptr;
+
+ opus_int32 regu;
+ opus_int32 *WLTP_ptr;
+ opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26;
+ opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits;
+
+ opus_int32 temp32, denom32;
+ opus_int extra_shifts;
+ opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs;
+ opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16;
+ opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];
+ opus_int32 wd, m_Q12;
+
+ b_Q14_ptr = b_Q14;
+ WLTP_ptr = WLTP;
+ r_ptr = &r_lpc[ mem_offset ];
+ for( k = 0; k < nb_subfr; k++ ) {
+ lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
+
+ silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */
+
+ /* Assure headroom */
+ LZs = silk_CLZ32( rr[k] );
+ if( LZs < LTP_CORRS_HEAD_ROOM ) {
+ rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs );
+ rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs );
+ }
+ corr_rshifts[ k ] = rr_shifts;
+ silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
+
+ /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */
+ silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */
+ if( corr_rshifts[ k ] > rr_shifts ) {
+ rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */
+ }
+ silk_assert( rr[ k ] >= 0 );
+
+ regu = 1;
+ regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+ regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+ regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+ silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER );
+
+ silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */
+
+ /* Limit and store in Q14 */
+ silk_fit_LTP( b_Q16, b_Q14_ptr );
+
+ /* Calculate residual energy */
+ nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */
+
+ /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */
+ extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM );
+ denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */
+ silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */
+ denom32 = silk_max( denom32, 1 );
+ silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */
+ temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */
+ temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */
+
+ /* Limit temp such that the below scaling never wraps around */
+ WLTP_max = 0;
+ for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {
+ WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max );
+ }
+ lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */
+ silk_assert( 26 - 18 + lshift >= 0 );
+ if( 26 - 18 + lshift < 31 ) {
+ temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) );
+ }
+
+ silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */
+
+ w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */
+ silk_assert( w[k] >= 0 );
+
+ r_ptr += subfr_length;
+ b_Q14_ptr += LTP_ORDER;
+ WLTP_ptr += LTP_ORDER * LTP_ORDER;
+ }
+
+ maxRshifts = 0;
+ for( k = 0; k < nb_subfr; k++ ) {
+ maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts );
+ }
+
+ /* Compute LTP coding gain */
+ if( LTPredCodGain_Q7 != NULL ) {
+ LPC_LTP_res_nrg = 0;
+ LPC_res_nrg = 0;
+ silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */
+ for( k = 0; k < nb_subfr; k++ ) {
+ LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
+ LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
+ }
+ LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */
+
+ div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 );
+ *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) );
+
+ silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) );
+ }
+
+ /* smoothing */
+ /* d = sum( B, 1 ); */
+ b_Q14_ptr = b_Q14;
+ for( k = 0; k < nb_subfr; k++ ) {
+ d_Q14[ k ] = 0;
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ d_Q14[ k ] += b_Q14_ptr[ i ];
+ }
+ b_Q14_ptr += LTP_ORDER;
+ }
+
+ /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */
+
+ /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */
+ max_abs_d_Q14 = 0;
+ max_w_bits = 0;
+ for( k = 0; k < nb_subfr; k++ ) {
+ max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) );
+ /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */
+ /* Find bits needed in Q( 18 - maxRshifts ) */
+ max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts );
+ }
+
+ /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */
+ silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) );
+
+ /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */
+ extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14;
+
+ /* Subtract what we got available; bits in output var plus maxRshifts */
+ extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */
+ extra_shifts = silk_max_int( extra_shifts, 0 );
+
+ maxRshifts_wxtra = maxRshifts + extra_shifts;
+
+ temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */
+ wd = 0;
+ for( k = 0; k < nb_subfr; k++ ) {
+ /* w has at least 2 bits of headroom so no overflow should happen */
+ temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */
+ wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */
+ }
+ m_Q12 = silk_DIV32_varQ( wd, temp32, 12 );
+
+ b_Q14_ptr = b_Q14;
+ for( k = 0; k < nb_subfr; k++ ) {
+ /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */
+ if( 2 - corr_rshifts[k] > 0 ) {
+ temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] );
+ } else {
+ temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 );
+ }
+
+ g_Q26 = silk_MUL(
+ silk_DIV32(
+ SILK_FIX_CONST( LTP_SMOOTHING, 26 ),
+ silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */
+ silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */
+
+ temp32 = 0;
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */
+ temp32 += delta_b_Q14[ i ]; /* Q14 */
+ }
+ temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 );
+ }
+ b_Q14_ptr += LTP_ORDER;
+ }
+}
+
+void silk_fit_LTP(
+ opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
+ opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
+)
+{
+ opus_int i;
+
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) );
+ }
+}
diff --git a/drivers/opus/silk/fixed/find_pitch_lags_FIX.c b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
new file mode 100644
index 0000000000..0598477cd1
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
@@ -0,0 +1,145 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Find pitch lags */
+void silk_find_pitch_lags_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
+ opus_int16 res[], /* O residual */
+ const opus_int16 x[], /* I Speech signal */
+ int arch /* I Run-time architecture */
+)
+{
+ opus_int buf_len, i, scale;
+ opus_int32 thrhld_Q13, res_nrg;
+ const opus_int16 *x_buf, *x_buf_ptr;
+ VARDECL( opus_int16, Wsig );
+ opus_int16 *Wsig_ptr;
+ opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
+ opus_int16 rc_Q15[ MAX_FIND_PITCH_LPC_ORDER ];
+ opus_int32 A_Q24[ MAX_FIND_PITCH_LPC_ORDER ];
+ opus_int16 A_Q12[ MAX_FIND_PITCH_LPC_ORDER ];
+ SAVE_STACK;
+
+ /******************************************/
+ /* Set up buffer lengths etc based on Fs */
+ /******************************************/
+ buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
+
+ /* Safety check */
+ silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+
+ x_buf = x - psEnc->sCmn.ltp_mem_length;
+
+ /*************************************/
+ /* Estimate LPC AR coefficients */
+ /*************************************/
+
+ /* Calculate windowed signal */
+
+ ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 );
+
+ /* First LA_LTP samples */
+ x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
+ Wsig_ptr = Wsig;
+ silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch );
+
+ /* Middle un - windowed samples */
+ Wsig_ptr += psEnc->sCmn.la_pitch;
+ x_buf_ptr += psEnc->sCmn.la_pitch;
+ silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) );
+
+ /* Last LA_LTP samples */
+ Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
+ x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
+ silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
+
+ /* Calculate autocorrelation sequence */
+ silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );
+
+ /* Add white noise, as fraction of energy */
+ auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1;
+
+ /* Calculate the reflection coefficients using schur */
+ res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder );
+
+ /* Prediction gain */
+ psEncCtrl->predGain_Q16 = silk_DIV32_varQ( auto_corr[ 0 ], silk_max_int( res_nrg, 1 ), 16 );
+
+ /* Convert reflection coefficients to prediction coefficients */
+ silk_k2a( A_Q24, rc_Q15, psEnc->sCmn.pitchEstimationLPCOrder );
+
+ /* Convert From 32 bit Q24 to 16 bit Q12 coefs */
+ for( i = 0; i < psEnc->sCmn.pitchEstimationLPCOrder; i++ ) {
+ A_Q12[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( A_Q24[ i ], 12 ) );
+ }
+
+ /* Do BWE */
+ silk_bwexpander( A_Q12, psEnc->sCmn.pitchEstimationLPCOrder, SILK_FIX_CONST( FIND_PITCH_BANDWIDTH_EXPANSION, 16 ) );
+
+ /*****************************************/
+ /* LPC analysis filtering */
+ /*****************************************/
+ silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+
+ if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
+ /* Threshold for pitch estimator */
+ thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 );
+ thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder );
+ thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 21 ), psEnc->sCmn.speech_activity_Q8 );
+ thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15, 13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) );
+ thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 14 ), psEnc->sCmn.input_tilt_Q15 );
+ thrhld_Q13 = silk_SAT16( thrhld_Q13 );
+
+ /*****************************************/
+ /* Call pitch estimator */
+ /*****************************************/
+ if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,
+ &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,
+ (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr,
+ psEnc->sCmn.arch) == 0 )
+ {
+ psEnc->sCmn.indices.signalType = TYPE_VOICED;
+ } else {
+ psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+ }
+ } else {
+ silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
+ psEnc->sCmn.indices.lagIndex = 0;
+ psEnc->sCmn.indices.contourIndex = 0;
+ psEnc->LTPCorr_Q15 = 0;
+ }
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/find_pred_coefs_FIX.c b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
new file mode 100644
index 0000000000..0ab70df09d
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
@@ -0,0 +1,147 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+
+void silk_find_pred_coefs_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
+ const opus_int16 res_pitch[], /* I Residual from pitch analysis */
+ const opus_int16 x[], /* I Speech signal */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ opus_int i;
+ opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];
+ opus_int16 NLSF_Q15[ MAX_LPC_ORDER ];
+ const opus_int16 *x_ptr;
+ opus_int16 *x_pre_ptr;
+ VARDECL( opus_int16, LPC_in_pre );
+ opus_int32 tmp, min_gain_Q16, minInvGain_Q30;
+ opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ];
+ SAVE_STACK;
+
+ /* weighting for weighted least squares */
+ min_gain_Q16 = silk_int32_MAX >> 6;
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ min_gain_Q16 = silk_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] );
+ }
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ /* Divide to Q16 */
+ silk_assert( psEncCtrl->Gains_Q16[ i ] > 0 );
+ /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */
+ invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 );
+
+ /* Ensure Wght_Q15 a minimum value 1 */
+ invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 );
+
+ /* Square the inverted gains */
+ silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) );
+ tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] );
+ Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 );
+
+ /* Invert the inverted and normalized gains */
+ local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] );
+ }
+
+ ALLOC( LPC_in_pre,
+ psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder
+ + psEnc->sCmn.frame_length, opus_int16 );
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ VARDECL( opus_int32, WLTP );
+
+ /**********/
+ /* VOICED */
+ /**********/
+ silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+
+ ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );
+
+ /* LTP analysis */
+ silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
+ res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
+ psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );
+
+ /* Quantize LTP gain parameters */
+ silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
+ &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
+
+ /* Control LTP scaling */
+ silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
+
+ /* Create LTP residual */
+ silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14,
+ psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+ } else {
+ /************/
+ /* UNVOICED */
+ /************/
+ /* Create signal with prepended subframes, scaled by inverse gains */
+ x_ptr = x - psEnc->sCmn.predictLPCOrder;
+ x_pre_ptr = LPC_in_pre;
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ],
+ psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
+ x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
+ x_ptr += psEnc->sCmn.subfr_length;
+ }
+
+ silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
+ psEncCtrl->LTPredCodGain_Q7 = 0;
+ psEnc->sCmn.sum_log_gain_Q7 = 0;
+ }
+
+ /* Limit on total predictive coding gain */
+ if( psEnc->sCmn.first_frame_after_reset ) {
+ minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 );
+ } else {
+ minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */
+ minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30,
+ silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ),
+ silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 );
+ }
+
+ /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
+ silk_find_LPC_FIX( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain_Q30 );
+
+ /* Quantize LSFs */
+ silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
+
+ /* Calculate residual energy using quantized LPC coefficients */
+ silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains,
+ psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+ /* Copy to prediction struct for use in next frame for interpolation */
+ silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/k2a_FIX.c b/drivers/opus/silk/fixed/k2a_FIX.c
new file mode 100644
index 0000000000..848666ee3b
--- /dev/null
+++ b/drivers/opus/silk/fixed/k2a_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a(
+ opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
+ const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */
+ const opus_int32 order /* I Prediction order */
+)
+{
+ opus_int k, n;
+ opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
+
+ for( k = 0; k < order; k++ ) {
+ for( n = 0; n < k; n++ ) {
+ Atmp[ n ] = A_Q24[ n ];
+ }
+ for( n = 0; n < k; n++ ) {
+ A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] );
+ }
+ A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 );
+ }
+}
diff --git a/drivers/opus/silk/fixed/k2a_Q16_FIX.c b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
new file mode 100644
index 0000000000..f7e62e95fe
--- /dev/null
+++ b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_Q16(
+ opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */
+ const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */
+ const opus_int32 order /* I Prediction order */
+)
+{
+ opus_int k, n;
+ opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
+
+ for( k = 0; k < order; k++ ) {
+ for( n = 0; n < k; n++ ) {
+ Atmp[ n ] = A_Q24[ n ];
+ }
+ for( n = 0; n < k; n++ ) {
+ A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] );
+ }
+ A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 );
+ }
+}
diff --git a/drivers/opus/silk/fixed/main_FIX.h b/drivers/opus/silk/fixed/main_FIX.h
new file mode 100644
index 0000000000..fb47ffe700
--- /dev/null
+++ b/drivers/opus/silk/fixed/main_FIX.h
@@ -0,0 +1,257 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_FIX_H
+#define SILK_MAIN_FIX_H
+
+#include "SigProc_FIX.h"
+#include "structs_FIX.h"
+#include "control.h"
+#include "silk_main.h"
+#include "PLC.h"
+#include "debug.h"
+#include "entenc.h"
+
+#ifndef FORCE_CPP_BUILD
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#endif
+
+#define silk_encoder_state_Fxx silk_encoder_state_FIX
+#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX
+#define silk_encode_frame_Fxx silk_encode_frame_FIX
+
+/*********************/
+/* Encoder Functions */
+/*********************/
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+ silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FIX(
+ silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
+);
+
+/* Encoder main function */
+opus_int silk_encode_frame_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ opus_int condCoding, /* I The type of conditional coding to use */
+ opus_int maxBits, /* I If > 0: maximum number of output bits */
+ opus_int useCBR /* I Flag to force constant-bitrate operation */
+);
+
+/* Initializes the Silk encoder state */
+opus_int silk_init_encoder(
+ silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ int arch /* I Run-time architecture */
+);
+
+/* Control the Silk encoder */
+opus_int silk_control_encoder(
+ silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */
+ silk_EncControlStruct *encControl, /* I Control structure */
+ const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */
+ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */
+ const opus_int channelNb, /* I Channel number */
+ const opus_int force_fs_kHz
+);
+
+/****************/
+/* Prefiltering */
+/****************/
+void silk_prefilter_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state */
+ const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */
+ opus_int32 xw_Q10[], /* O Weighted signal */
+ const opus_int16 x[] /* I Speech signal */
+);
+
+/**************************/
+/* Noise shaping analysis */
+/**************************/
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */
+ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */
+ const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */
+ int arch /* I Run-time architecture */
+);
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FIX(
+ opus_int32 *corr, /* O Result [order + 1] */
+ opus_int *scale, /* O Scaling of the correlation vector */
+ const opus_int16 *input, /* I Input data to correlate */
+ const opus_int warping_Q16, /* I Warping coefficient */
+ const opus_int length, /* I Length of input */
+ const opus_int order /* I Correlation order (even) */
+);
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/**********************************************/
+/* Prediction Analysis */
+/**********************************************/
+/* Find pitch lags */
+void silk_find_pitch_lags_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
+ opus_int16 res[], /* O residual */
+ const opus_int16 x[], /* I Speech signal */
+ int arch /* I Run-time architecture */
+);
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */
+ const opus_int16 res_pitch[], /* I Residual from pitch analysis */
+ const opus_int16 x[], /* I Speech signal */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/* LPC analysis */
+void silk_find_LPC_FIX(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ opus_int16 NLSF_Q15[], /* O NLSFs */
+ const opus_int16 x[], /* I Input signal */
+ const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */
+);
+
+/* LTP analysis */
+void silk_find_LTP_FIX(
+ opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */
+ opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */
+ opus_int *LTPredCodGain_Q7, /* O LTP coding gain */
+ const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */
+ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
+ const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */
+ const opus_int subfr_length, /* I subframe length */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int mem_offset, /* I number of samples in LTP memory */
+ opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */
+);
+
+void silk_LTP_analysis_filter_FIX(
+ opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */
+ const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */
+ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */
+ const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */
+ const opus_int subfr_length, /* I Length of each subframe */
+ const opus_int nb_subfr, /* I Number of subframes */
+ const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */
+);
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order */
+/* of preceding samples */
+void silk_residual_energy_FIX(
+ opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */
+ opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */
+ const opus_int16 x[], /* I Input signal */
+ opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */
+ const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */
+ const opus_int subfr_length, /* I Subframe length */
+ const opus_int nb_subfr, /* I Number of subframes */
+ const opus_int LPC_order /* I LPC order */
+);
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+opus_int32 silk_residual_energy16_covar_FIX(
+ const opus_int16 *c, /* I Prediction vector */
+ const opus_int32 *wXX, /* I Correlation matrix */
+ const opus_int32 *wXx, /* I Correlation vector */
+ opus_int32 wxx, /* I Signal energy */
+ opus_int D, /* I Dimension */
+ opus_int cQ /* I Q value for c vector 0 - 15 */
+);
+
+/* Processing of gains */
+void silk_process_gains_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/******************/
+/* Linear Algebra */
+/******************/
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FIX(
+ const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */
+ const opus_int L, /* I Length of vectors */
+ const opus_int order, /* I Max lag for correlation */
+ const opus_int head_room, /* I Desired headroom */
+ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */
+ opus_int *rshifts /* I/O Right shifts of correlations */
+);
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FIX(
+ const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */
+ const opus_int16 *t, /* I Target vector [L] */
+ const opus_int L, /* I Length of vectors */
+ const opus_int order, /* I Max lag for correlation */
+ opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */
+ const opus_int rshifts /* I Right shifts of correlations */
+);
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FIX(
+ opus_int32 *XX, /* I/O Correlation matrices */
+ opus_int32 *xx, /* I/O Correlation values */
+ opus_int32 noise, /* I Noise to add */
+ opus_int D /* I Dimension of XX */
+);
+
+/* Solves Ax = b, assuming A is symmetric */
+void silk_solve_LDL_FIX(
+ opus_int32 *A, /* I Pointer to symetric square matrix A */
+ opus_int M, /* I Size of matrix */
+ const opus_int32 *b, /* I Pointer to b vector */
+ opus_int32 *x_Q16 /* O Pointer to x solution vector */
+);
+
+#ifndef FORCE_CPP_BUILD
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* FORCE_CPP_BUILD */
+#endif /* SILK_MAIN_FIX_H */
diff --git a/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
new file mode 100644
index 0000000000..420cbeedfc
--- /dev/null
+++ b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
@@ -0,0 +1,445 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */
+/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
+/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
+/* coefficient in an array of coefficients, for monic filters. */
+static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/
+ const opus_int32 *coefs_Q24,
+ opus_int lambda_Q16,
+ opus_int order
+) {
+ opus_int i;
+ opus_int32 gain_Q24;
+
+ lambda_Q16 = -lambda_Q16;
+ gain_Q24 = coefs_Q24[ order - 1 ];
+ for( i = order - 2; i >= 0; i-- ) {
+ gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 );
+ }
+ gain_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 );
+ return silk_INVERSE32_varQ( gain_Q24, 40 );
+}
+
+/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */
+/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
+static OPUS_INLINE void limit_warped_coefs(
+ opus_int32 *coefs_syn_Q24,
+ opus_int32 *coefs_ana_Q24,
+ opus_int lambda_Q16,
+ opus_int32 limit_Q24,
+ opus_int order
+) {
+ opus_int i, iter, ind = 0;
+ opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16;
+ opus_int32 nom_Q16, den_Q24;
+
+ /* Convert to monic coefficients */
+ lambda_Q16 = -lambda_Q16;
+ for( i = order - 1; i > 0; i-- ) {
+ coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+ coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+ }
+ lambda_Q16 = -lambda_Q16;
+ nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 );
+ den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
+ gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+ den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
+ gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+ for( i = 0; i < order; i++ ) {
+ coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+ coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+ }
+
+ for( iter = 0; iter < 10; iter++ ) {
+ /* Find maximum absolute value */
+ maxabs_Q24 = -1;
+ for( i = 0; i < order; i++ ) {
+ tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) );
+ if( tmp > maxabs_Q24 ) {
+ maxabs_Q24 = tmp;
+ ind = i;
+ }
+ }
+ if( maxabs_Q24 <= limit_Q24 ) {
+ /* Coefficients are within range - done */
+ return;
+ }
+
+ /* Convert back to true warped coefficients */
+ for( i = 1; i < order; i++ ) {
+ coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+ coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+ }
+ gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 );
+ gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 );
+ for( i = 0; i < order; i++ ) {
+ coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+ coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+ }
+
+ /* Apply bandwidth expansion */
+ chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ(
+ silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ),
+ silk_MUL( maxabs_Q24, ind + 1 ), 22 );
+ silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 );
+ silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 );
+
+ /* Convert to monic warped coefficients */
+ lambda_Q16 = -lambda_Q16;
+ for( i = order - 1; i > 0; i-- ) {
+ coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+ coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+ }
+ lambda_Q16 = -lambda_Q16;
+ nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 );
+ den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
+ gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+ den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
+ gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+ for( i = 0; i < order; i++ ) {
+ coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+ coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+ }
+ }
+ silk_assert( 0 );
+}
+
+/**************************************************************/
+/* Compute noise shaping coefficients and initial gain values */
+/**************************************************************/
+void silk_noise_shape_analysis_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */
+ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */
+ const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */
+ int arch /* I Run-time architecture */
+)
+{
+ silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+ opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
+ opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
+ opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
+ opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
+ opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ];
+ opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
+ opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ];
+ opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ];
+ VARDECL( opus_int16, x_windowed );
+ const opus_int16 *x_ptr, *pitch_res_ptr;
+ SAVE_STACK;
+
+ /* Point to start of first LPC analysis block */
+ x_ptr = x - psEnc->sCmn.la_shape;
+
+ /****************/
+ /* GAIN CONTROL */
+ /****************/
+ SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;
+
+ /* Input quality is the average of the quality in the lowest two VAD bands */
+ psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]
+ + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );
+
+ /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
+ psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -
+ SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );
+
+ /* Reduce coding SNR during low speech activity */
+ if( psEnc->sCmn.useCBR == 0 ) {
+ b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;
+ b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+ silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/
+ silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/
+ }
+
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Reduce gains for periodic signals */
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
+ } else {
+ /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+ silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),
+ SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
+ }
+
+ /*************************/
+ /* SPARSENESS PROCESSING */
+ /*************************/
+ /* Set quantizer offset */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Initially set to 0; may be overruled in process_gains(..) */
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ psEncCtrl->sparseness_Q8 = 0;
+ } else {
+ /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+ nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
+ energy_variation_Q7 = 0;
+ log_energy_prev_Q7 = 0;
+ pitch_res_ptr = pitch_res;
+ for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+ silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
+ nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/
+
+ log_energy_Q7 = silk_lin2log( nrg );
+ if( k > 0 ) {
+ energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );
+ }
+ log_energy_prev_Q7 = log_energy_Q7;
+ pitch_res_ptr += nSamples;
+ }
+
+ psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
+ SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
+
+ /* Set quantization offset depending on sparseness measure */
+ if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ } else {
+ psEnc->sCmn.indices.quantOffsetType = 1;
+ }
+
+ /* Increase coding SNR for sparse signals */
+ SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
+ }
+
+ /*******************************/
+ /* Control bandwidth expansion */
+ /*******************************/
+ /* More BWE for signals with high prediction gain */
+ strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
+ BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
+ silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
+ delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
+ SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
+ BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
+ BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
+ /* BWExp1 will be applied after BWExp2, so make it relative */
+ BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+ warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );
+ } else {
+ warping_Q16 = 0;
+ }
+
+ /********************************************/
+ /* Compute noise shaping AR coefs and gains */
+ /********************************************/
+ ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ /* Apply window: sine slope followed by flat part followed by cosine slope */
+ opus_int shift, slope_part, flat_part;
+ flat_part = psEnc->sCmn.fs_kHz * 3;
+ slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );
+
+ silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );
+ shift = slope_part;
+ silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );
+ shift += flat_part;
+ silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+ /* Update pointer: next LPC analysis block */
+ x_ptr += psEnc->sCmn.subfr_length;
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Calculate warped auto correlation */
+ silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+ } else {
+ /* Calculate regular auto correlation */
+ silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
+ }
+
+ /* Add white noise, as a fraction of energy */
+ auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
+ SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
+
+ /* Calculate the reflection coefficients using schur */
+ nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
+ silk_assert( nrg >= 0 );
+
+ /* Convert reflection coefficients to prediction coefficients */
+ silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
+
+ Qnrg = -scale; /* range: -12...30*/
+ silk_assert( Qnrg >= -12 );
+ silk_assert( Qnrg <= 30 );
+
+ /* Make sure that Qnrg is an even number */
+ if( Qnrg & 1 ) {
+ Qnrg -= 1;
+ nrg >>= 1;
+ }
+
+ tmp32 = silk_SQRT_APPROX( nrg );
+ Qnrg >>= 1; /* range: -6...15*/
+
+ psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( tmp32, 16 - Qnrg );
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Adjust gain for warping */
+ gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
+ silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+ if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
+ psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
+ } else {
+ psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+ }
+ }
+
+ /* Bandwidth expansion for synthesis filter shaping */
+ silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
+
+ /* Compute noise shaping filter coefficients */
+ silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
+
+ /* Bandwidth expansion for analysis filter shaping */
+ silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
+ silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
+
+ /* Ratio of prediction gains, in energy domain */
+ pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );
+ nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );
+
+ /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
+ pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
+ psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
+
+ /* Convert to monic warped prediction coefficients and limit absolute values */
+ limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
+
+ /* Convert from Q24 to Q13 and store in int16 */
+ for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
+ psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
+ psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
+ }
+ }
+
+ /*****************/
+ /* Gain tweaking */
+ /*****************/
+ /* Increase gains during low speech activity and put lower limit on gains */
+ gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );
+ gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );
+ silk_assert( gain_mult_Q16 > 0 );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+ silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+ psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
+ }
+
+ gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
+ psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
+ }
+
+ /************************************************/
+ /* Control low-frequency shaping and noise tilt */
+ /************************************************/
+ /* Less low frequency shaping for noisy inputs */
+ strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),
+ SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );
+ strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+ /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+ opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );
+ /* Pack two coefficients in one int32 */
+ psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );
+ psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+ }
+ silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/
+ Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -
+ silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),
+ silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );
+ } else {
+ b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/
+ /* Pack two coefficients in one int32 */
+ psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -
+ silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );
+ psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+ for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];
+ }
+ Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );
+ }
+
+ /****************************/
+ /* HARMONIC SHAPING CONTROL */
+ /****************************/
+ /* Control boosting of harmonic frequencies */
+ HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
+ psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
+
+ /* More harmonic boost for noisy input signals */
+ HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
+ SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
+
+ if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* More harmonic noise shaping for high bitrates or noisy input */
+ HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
+ SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),
+ psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );
+
+ /* Less harmonic noise shaping for less periodic signals */
+ HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),
+ silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );
+ } else {
+ HarmShapeGain_Q16 = 0;
+ }
+
+ /*************************/
+ /* Smooth over subframes */
+ /*************************/
+ for( k = 0; k < MAX_NB_SUBFR; k++ ) {
+ psShapeSt->HarmBoost_smth_Q16 =
+ silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+ psShapeSt->HarmShapeGain_smth_Q16 =
+ silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+ psShapeSt->Tilt_smth_Q16 =
+ silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+
+ psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 );
+ psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
+ psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 );
+ }
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
new file mode 100644
index 0000000000..4d65c09d1d
--- /dev/null
+++ b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
@@ -0,0 +1,744 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/***********************************************************
+* Pitch analyser function
+********************************************************** */
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+#include "stack_alloc.h"
+#include "debug.h"
+#include "pitch.h"
+
+#define SCRATCH_SIZE 22
+#define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 )
+#define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 )
+#define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 )
+#define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 )
+#define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 )
+#define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 )
+#define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
+#define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
+#define D_COMP_MIN ( MIN_LAG_8KHZ - 3 )
+#define D_COMP_MAX ( MAX_LAG_8KHZ + 4 )
+#define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN )
+
+typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
+
+/************************************************************/
+/* Internally used functions */
+/************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+ silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
+ const opus_int16 frame[], /* I vector to correlate */
+ opus_int start_lag, /* I lag offset to search around */
+ opus_int sf_length, /* I length of a 5 ms subframe */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
+);
+
+static void silk_P_Ana_calc_energy_st3(
+ silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
+ const opus_int16 frame[], /* I vector to calc energy in */
+ opus_int start_lag, /* I lag offset to search around */
+ opus_int sf_length, /* I length of one 5 ms subframe */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity /* I Complexity setting */
+);
+
+/*************************************************************/
+/* FIXED POINT CORE PITCH ANALYSIS FUNCTION */
+/*************************************************************/
+opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
+ const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
+ opus_int *pitch_out, /* O 4 pitch lag values */
+ opus_int16 *lagIndex, /* O Lag Index */
+ opus_int8 *contourIndex, /* O Pitch contour Index */
+ opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
+ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
+ const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
+ const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
+ const opus_int Fs_kHz, /* I Sample frequency (kHz) */
+ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
+ const opus_int nb_subfr, /* I number of 5 ms subframes */
+ int arch /* I Run-time architecture */
+)
+{
+ VARDECL( opus_int16, frame_8kHz );
+ VARDECL( opus_int16, frame_4kHz );
+ opus_int32 filt_state[ 6 ];
+ const opus_int16 *input_frame_ptr;
+ opus_int i, k, d, j;
+ VARDECL( opus_int16, C );
+ VARDECL( opus_int32, xcorr32 );
+ const opus_int16 *target_ptr, *basis_ptr;
+ opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
+ opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
+ VARDECL( opus_int16, d_comp );
+ opus_int32 sum, threshold, lag_counter;
+ opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
+ opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
+ VARDECL( silk_pe_stage3_vals, energies_st3 );
+ VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
+ opus_int frame_length, frame_length_8kHz, frame_length_4kHz;
+ opus_int sf_length;
+ opus_int min_lag;
+ opus_int max_lag;
+ opus_int32 contour_bias_Q15, diff;
+ opus_int nb_cbk_search, cbk_size;
+ opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
+ const opus_int8 *Lag_CB_ptr;
+ SAVE_STACK;
+ /* Check for valid sampling frequency */
+ silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+
+ /* Check for valid complexity setting */
+ silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+ silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+ silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
+ silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
+
+ /* Set up frame lengths max / min lag for the sampling frequency */
+ frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
+ frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
+ frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
+ sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz;
+ min_lag = PE_MIN_LAG_MS * Fs_kHz;
+ max_lag = PE_MAX_LAG_MS * Fs_kHz - 1;
+
+ /* Resample from input sampled at Fs_kHz to 8 kHz */
+ ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
+ if( Fs_kHz == 16 ) {
+ silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+ silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
+ } else if( Fs_kHz == 12 ) {
+ silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
+ silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
+ } else {
+ silk_assert( Fs_kHz == 8 );
+ silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
+ }
+
+ /* Decimate again to 4 kHz */
+ silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
+ ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
+ silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
+
+ /* Low-pass filter */
+ for( i = frame_length_4kHz - 1; i > 0; i-- ) {
+ frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
+ }
+
+ /*******************************************************************************
+ ** Scale 4 kHz signal down to prevent correlations measures from overflowing
+ ** find scaling as max scaling for each 8kHz(?) subframe
+ *******************************************************************************/
+
+ /* Inner product is calculated with different lengths, so scale for the worst case */
+ silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
+ if( shift > 0 ) {
+ shift = silk_RSHIFT( shift, 1 );
+ for( i = 0; i < frame_length_4kHz; i++ ) {
+ frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
+ }
+ }
+
+ /******************************************************************************
+ * FIRST STAGE, operating in 4 khz
+ ******************************************************************************/
+ ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
+ ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
+ silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
+ target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
+ for( k = 0; k < nb_subfr >> 1; k++ ) {
+ /* Check that we are within range of the array */
+ silk_assert( target_ptr >= frame_4kHz );
+ silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+ basis_ptr = target_ptr - MIN_LAG_4KHZ;
+
+ /* Check that we are within range of the array */
+ silk_assert( basis_ptr >= frame_4kHz );
+ silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+ celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
+
+ /* Calculate first vector products before loop */
+ cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
+ normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
+ normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) );
+ normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
+
+ matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
+ (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
+
+ /* From now on normalizer is computed recursively */
+ for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
+ basis_ptr--;
+
+ /* Check that we are within range of the array */
+ silk_assert( basis_ptr >= frame_4kHz );
+ silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+ cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
+
+ /* Add contribution of new sample and remove contribution from oldest sample */
+ normalizer = silk_ADD32( normalizer,
+ silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
+ silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
+
+ matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
+ (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
+ }
+ /* Update target pointer */
+ target_ptr += SF_LENGTH_8KHZ;
+ }
+
+ /* Combine two subframes into single correlation measure and apply short-lag bias */
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+ sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
+ + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */
+ sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
+ C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
+ }
+ } else {
+ /* Only short-lag bias */
+ for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+ sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */
+ sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
+ C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
+ }
+ }
+
+ /* Sort */
+ length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
+ silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+ silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
+ length_d_srch );
+
+ /* Escape if correlation is very low already here */
+ Cmax = (opus_int)C[ 0 ]; /* Q14 */
+ if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
+ silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+ *LTPCorr_Q15 = 0;
+ *lagIndex = 0;
+ *contourIndex = 0;
+ RESTORE_STACK;
+ return 1;
+ }
+
+ threshold = silk_SMULWB( search_thres1_Q16, Cmax );
+ for( i = 0; i < length_d_srch; i++ ) {
+ /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
+ if( C[ i ] > threshold ) {
+ d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
+ } else {
+ length_d_srch = i;
+ break;
+ }
+ }
+ silk_assert( length_d_srch > 0 );
+
+ ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
+ for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
+ d_comp[ i - D_COMP_MIN ] = 0;
+ }
+ for( i = 0; i < length_d_srch; i++ ) {
+ d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
+ }
+
+ /* Convolution */
+ for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+ d_comp[ i - D_COMP_MIN ] +=
+ d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
+ }
+
+ length_d_srch = 0;
+ for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
+ if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
+ d_srch[ length_d_srch ] = i;
+ length_d_srch++;
+ }
+ }
+
+ /* Convolution */
+ for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+ d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
+ + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
+ }
+
+ length_d_comp = 0;
+ for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
+ if( d_comp[ i - D_COMP_MIN ] > 0 ) {
+ d_comp[ length_d_comp ] = i - 2;
+ length_d_comp++;
+ }
+ }
+
+ /**********************************************************************************
+ ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
+ *************************************************************************************/
+
+ /******************************************************************************
+ ** Scale signal down to avoid correlations measures from overflowing
+ *******************************************************************************/
+ /* find scaling as max scaling for each subframe */
+ silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
+ if( shift > 0 ) {
+ shift = silk_RSHIFT( shift, 1 );
+ for( i = 0; i < frame_length_8kHz; i++ ) {
+ frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
+ }
+ }
+
+ /*********************************************************************************
+ * Find energy of each subframe projected onto its history, for a range of delays
+ *********************************************************************************/
+ silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
+
+ target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
+ for( k = 0; k < nb_subfr; k++ ) {
+
+ /* Check that we are within range of the array */
+ silk_assert( target_ptr >= frame_8kHz );
+ silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
+
+ energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
+ for( j = 0; j < length_d_comp; j++ ) {
+ d = d_comp[ j ];
+ basis_ptr = target_ptr - d;
+
+ /* Check that we are within range of the array */
+ silk_assert( basis_ptr >= frame_8kHz );
+ silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
+
+ cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ if( cross_corr > 0 ) {
+ energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
+ (opus_int16)silk_DIV32_varQ( cross_corr,
+ silk_ADD32( energy_target,
+ energy_basis ),
+ 13 + 1 ); /* Q13 */
+ } else {
+ matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
+ }
+ }
+ target_ptr += SF_LENGTH_8KHZ;
+ }
+
+ /* search over lag range and lags codebook */
+ /* scale factor for lag codebook, as a function of center lag */
+
+ CCmax = silk_int32_MIN;
+ CCmax_b = silk_int32_MIN;
+
+ CBimax = 0; /* To avoid returning undefined lag values */
+ lag = -1; /* To check if lag with strong enough correlation has been found */
+
+ if( prevLag > 0 ) {
+ if( Fs_kHz == 12 ) {
+ prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
+ } else if( Fs_kHz == 16 ) {
+ prevLag = silk_RSHIFT( prevLag, 1 );
+ }
+ prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
+ } else {
+ prevLag_log2_Q7 = 0;
+ }
+ silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
+ /* Set up stage 2 codebook based on number of subframes */
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ cbk_size = PE_NB_CBKS_STAGE2_EXT;
+ Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+ if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
+ /* If input is 8 khz use a larger codebook here because it is last stage */
+ nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
+ } else {
+ nb_cbk_search = PE_NB_CBKS_STAGE2;
+ }
+ } else {
+ cbk_size = PE_NB_CBKS_STAGE2_10MS;
+ Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+ nb_cbk_search = PE_NB_CBKS_STAGE2_10MS;
+ }
+
+ for( k = 0; k < length_d_srch; k++ ) {
+ d = d_srch[ k ];
+ for( j = 0; j < nb_cbk_search; j++ ) {
+ CC[ j ] = 0;
+ for( i = 0; i < nb_subfr; i++ ) {
+ opus_int d_subfr;
+ /* Try all codebooks */
+ d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
+ CC[ j ] = CC[ j ]
+ + (opus_int32)matrix_ptr( C, i,
+ d_subfr - ( MIN_LAG_8KHZ - 2 ),
+ CSTRIDE_8KHZ );
+ }
+ }
+ /* Find best codebook */
+ CCmax_new = silk_int32_MIN;
+ CBimax_new = 0;
+ for( i = 0; i < nb_cbk_search; i++ ) {
+ if( CC[ i ] > CCmax_new ) {
+ CCmax_new = CC[ i ];
+ CBimax_new = i;
+ }
+ }
+
+ /* Bias towards shorter lags */
+ lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
+ silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
+ silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
+ CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
+
+ /* Bias towards previous lag */
+ silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
+ if( prevLag > 0 ) {
+ delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
+ silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
+ delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
+ prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
+ prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
+ CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
+ }
+
+ if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
+ CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */
+ silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */
+ ) {
+ CCmax_b = CCmax_new_b;
+ CCmax = CCmax_new;
+ lag = d;
+ CBimax = CBimax_new;
+ }
+ }
+
+ if( lag == -1 ) {
+ /* No suitable candidate found */
+ silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+ *LTPCorr_Q15 = 0;
+ *lagIndex = 0;
+ *contourIndex = 0;
+ RESTORE_STACK;
+ return 1;
+ }
+
+ /* Output normalized correlation */
+ *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
+ silk_assert( *LTPCorr_Q15 >= 0 );
+
+ if( Fs_kHz > 8 ) {
+ VARDECL( opus_int16, scratch_mem );
+ /***************************************************************************/
+ /* Scale input signal down to avoid correlations measures from overflowing */
+ /***************************************************************************/
+ /* find scaling as max scaling for each subframe */
+ silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
+ ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
+ if( shift > 0 ) {
+ /* Move signal to scratch mem because the input signal should be unchanged */
+ shift = silk_RSHIFT( shift, 1 );
+ for( i = 0; i < frame_length; i++ ) {
+ scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
+ }
+ input_frame_ptr = scratch_mem;
+ } else {
+ input_frame_ptr = frame;
+ }
+
+ /* Search in original signal */
+
+ CBimax_old = CBimax;
+ /* Compensate for decimation */
+ silk_assert( lag == silk_SAT16( lag ) );
+ if( Fs_kHz == 12 ) {
+ lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
+ } else if( Fs_kHz == 16 ) {
+ lag = silk_LSHIFT( lag, 1 );
+ } else {
+ lag = silk_SMULBB( lag, 3 );
+ }
+
+ lag = silk_LIMIT_int( lag, min_lag, max_lag );
+ start_lag = silk_max_int( lag - 2, min_lag );
+ end_lag = silk_min_int( lag + 2, max_lag );
+ lag_new = lag; /* to avoid undefined lag */
+ CBimax = 0; /* to avoid undefined lag */
+
+ CCmax = silk_int32_MIN;
+ /* pitch lags according to second stage */
+ for( k = 0; k < nb_subfr; k++ ) {
+ pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
+ }
+
+ /* Set up codebook parameters according to complexity setting and frame length */
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
+ cbk_size = PE_NB_CBKS_STAGE3_MAX;
+ Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+ } else {
+ nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+ cbk_size = PE_NB_CBKS_STAGE3_10MS;
+ Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+ }
+
+ /* Calculate the correlations and energies needed in stage 3 */
+ ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+ ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+ silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
+ silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+
+ lag_counter = 0;
+ silk_assert( lag == silk_SAT16( lag ) );
+ contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
+
+ target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+ energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
+ for( d = start_lag; d <= end_lag; d++ ) {
+ for( j = 0; j < nb_cbk_search; j++ ) {
+ cross_corr = 0;
+ energy = energy_target;
+ for( k = 0; k < nb_subfr; k++ ) {
+ cross_corr = silk_ADD32( cross_corr,
+ matrix_ptr( cross_corr_st3, k, j,
+ nb_cbk_search )[ lag_counter ] );
+ energy = silk_ADD32( energy,
+ matrix_ptr( energies_st3, k, j,
+ nb_cbk_search )[ lag_counter ] );
+ silk_assert( energy >= 0 );
+ }
+ if( cross_corr > 0 ) {
+ CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */
+ /* Reduce depending on flatness of contour */
+ diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */
+ silk_assert( diff == silk_SAT16( diff ) );
+ CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */
+ } else {
+ CCmax_new = 0;
+ }
+
+ if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
+ CCmax = CCmax_new;
+ lag_new = d;
+ CBimax = j;
+ }
+ }
+ lag_counter++;
+ }
+
+ for( k = 0; k < nb_subfr; k++ ) {
+ pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
+ }
+ *lagIndex = (opus_int16)( lag_new - min_lag);
+ *contourIndex = (opus_int8)CBimax;
+ } else { /* Fs_kHz == 8 */
+ /* Save Lags */
+ for( k = 0; k < nb_subfr; k++ ) {
+ pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
+ }
+ *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
+ *contourIndex = (opus_int8)CBimax;
+ }
+ silk_assert( *lagIndex >= 0 );
+ /* return as voiced */
+ RESTORE_STACK;
+ return 0;
+}
+
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+ silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
+ const opus_int16 frame[], /* I vector to correlate */
+ opus_int start_lag, /* I lag offset to search around */
+ opus_int sf_length, /* I length of a 5 ms subframe */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
+)
+{
+ const opus_int16 *target_ptr;
+ opus_int i, j, k, lag_counter, lag_low, lag_high;
+ opus_int nb_cbk_search, delta, idx, cbk_size;
+ VARDECL( opus_int32, scratch_mem );
+ VARDECL( opus_int32, xcorr32 );
+ const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+ SAVE_STACK;
+
+ silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+ silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+ nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+ cbk_size = PE_NB_CBKS_STAGE3_MAX;
+ } else {
+ silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+ Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+ nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+ cbk_size = PE_NB_CBKS_STAGE3_10MS;
+ }
+ ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+ ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
+
+ target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
+ for( k = 0; k < nb_subfr; k++ ) {
+ lag_counter = 0;
+
+ /* Calculate the correlations for each subframe */
+ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+ lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+ silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+ celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
+ for( j = lag_low; j <= lag_high; j++ ) {
+ silk_assert( lag_counter < SCRATCH_SIZE );
+ scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
+ lag_counter++;
+ }
+
+ delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+ for( i = 0; i < nb_cbk_search; i++ ) {
+ /* Fill out the 3 dim array that stores the correlations for */
+ /* each code_book vector for each start lag */
+ idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+ for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+ silk_assert( idx + j < SCRATCH_SIZE );
+ silk_assert( idx + j < lag_counter );
+ matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
+ scratch_mem[ idx + j ];
+ }
+ }
+ target_ptr += sf_length;
+ }
+ RESTORE_STACK;
+}
+
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively. */
+/********************************************************************/
+static void silk_P_Ana_calc_energy_st3(
+ silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
+ const opus_int16 frame[], /* I vector to calc energy in */
+ opus_int start_lag, /* I lag offset to search around */
+ opus_int sf_length, /* I length of one 5 ms subframe */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity /* I Complexity setting */
+)
+{
+ const opus_int16 *target_ptr, *basis_ptr;
+ opus_int32 energy;
+ opus_int k, i, j, lag_counter;
+ opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff;
+ VARDECL( opus_int32, scratch_mem );
+ const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+ SAVE_STACK;
+
+ silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+ silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+ nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+ cbk_size = PE_NB_CBKS_STAGE3_MAX;
+ } else {
+ silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+ Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+ nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+ cbk_size = PE_NB_CBKS_STAGE3_10MS;
+ }
+ ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+
+ target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
+ for( k = 0; k < nb_subfr; k++ ) {
+ lag_counter = 0;
+
+ /* Calculate the energy for first lag */
+ basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
+ energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
+ silk_assert( energy >= 0 );
+ scratch_mem[ lag_counter ] = energy;
+ lag_counter++;
+
+ lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
+ for( i = 1; i < lag_diff; i++ ) {
+ /* remove part outside new window */
+ energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
+ silk_assert( energy >= 0 );
+
+ /* add part that comes into window */
+ energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
+ silk_assert( energy >= 0 );
+ silk_assert( lag_counter < SCRATCH_SIZE );
+ scratch_mem[ lag_counter ] = energy;
+ lag_counter++;
+ }
+
+ delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+ for( i = 0; i < nb_cbk_search; i++ ) {
+ /* Fill out the 3 dim array that stores the correlations for */
+ /* each code_book vector for each start lag */
+ idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+ for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+ silk_assert( idx + j < SCRATCH_SIZE );
+ silk_assert( idx + j < lag_counter );
+ matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
+ scratch_mem[ idx + j ];
+ silk_assert(
+ matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
+ }
+ }
+ target_ptr += sf_length;
+ }
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/prefilter_FIX.c b/drivers/opus/silk/fixed/prefilter_FIX.c
new file mode 100644
index 0000000000..0b027eb836
--- /dev/null
+++ b/drivers/opus/silk/fixed/prefilter_FIX.c
@@ -0,0 +1,209 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Prefilter for finding Quantizer input signal */
+static OPUS_INLINE void silk_prefilt_FIX(
+ silk_prefilter_state_FIX *P, /* I/O state */
+ opus_int32 st_res_Q12[], /* I short term residual signal */
+ opus_int32 xw_Q3[], /* O prefiltered signal */
+ opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */
+ opus_int Tilt_Q14, /* I Tilt shaping coeficient */
+ opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */
+ opus_int lag, /* I Lag for harmonic shaping */
+ opus_int length /* I Length of signals */
+);
+
+void silk_warped_LPC_analysis_filter_FIX(
+ opus_int32 state[], /* I/O State [order + 1] */
+ opus_int32 res_Q2[], /* O Residual signal [length] */
+ const opus_int16 coef_Q13[], /* I Coefficients [order] */
+ const opus_int16 input[], /* I Input signal [length] */
+ const opus_int16 lambda_Q16, /* I Warping factor */
+ const opus_int length, /* I Length of input signal */
+ const opus_int order /* I Filter order (even) */
+)
+{
+ opus_int n, i;
+ opus_int32 acc_Q11, tmp1, tmp2;
+
+ /* Order must be even */
+ silk_assert( ( order & 1 ) == 0 );
+
+ for( n = 0; n < length; n++ ) {
+ /* Output of lowpass section */
+ tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+ state[ 0 ] = silk_LSHIFT( input[ n ], 14 );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+ state[ 1 ] = tmp2;
+ acc_Q11 = silk_RSHIFT( order, 1 );
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+ /* Loop over allpass sections */
+ for( i = 2; i < order; i += 2 ) {
+ /* Output of allpass section */
+ tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+ state[ i ] = tmp1;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+ /* Output of allpass section */
+ tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+ state[ i + 1 ] = tmp2;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+ }
+ state[ order ] = tmp1;
+ acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+ res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+ }
+}
+
+void silk_prefilter_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state */
+ const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */
+ opus_int32 xw_Q3[], /* O Weighted signal */
+ const opus_int16 x[] /* I Speech signal */
+)
+{
+ silk_prefilter_state_FIX *P = &psEnc->sPrefilt;
+ opus_int j, k, lag;
+ opus_int32 tmp_32;
+ const opus_int16 *AR1_shp_Q13;
+ const opus_int16 *px;
+ opus_int32 *pxw_Q3;
+ opus_int HarmShapeGain_Q12, Tilt_Q14;
+ opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14;
+ VARDECL( opus_int32, x_filt_Q12 );
+ VARDECL( opus_int32, st_res_Q2 );
+ opus_int16 B_Q10[ 2 ];
+ SAVE_STACK;
+
+ /* Set up pointers */
+ px = x;
+ pxw_Q3 = xw_Q3;
+ lag = P->lagPrev;
+ ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 );
+ ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ /* Update Variables that change per sub frame */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ lag = psEncCtrl->pitchL[ k ];
+ }
+
+ /* Noise shape parameters */
+ HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] );
+ silk_assert( HarmShapeGain_Q12 >= 0 );
+ HarmShapeFIRPacked_Q12 = silk_RSHIFT( HarmShapeGain_Q12, 2 );
+ HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 );
+ Tilt_Q14 = psEncCtrl->Tilt_Q14[ k ];
+ LF_shp_Q14 = psEncCtrl->LF_shp_Q14[ k ];
+ AR1_shp_Q13 = &psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER ];
+
+ /* Short term FIR filtering*/
+ silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px,
+ psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+
+ /* Reduce (mainly) low frequencies during harmonic emphasis */
+ B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 );
+ tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 ); /* Q26 */
+ tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ); /* Q26 */
+ tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] ); /* Q24 */
+ tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 ); /* Q10 */
+ B_Q10[ 1 ]= silk_SAT16( tmp_32 );
+ x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] );
+ for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
+ x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] );
+ }
+ P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ];
+
+ silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length );
+
+ px += psEnc->sCmn.subfr_length;
+ pxw_Q3 += psEnc->sCmn.subfr_length;
+ }
+
+ P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+ RESTORE_STACK;
+}
+
+/* Prefilter for finding Quantizer input signal */
+static OPUS_INLINE void silk_prefilt_FIX(
+ silk_prefilter_state_FIX *P, /* I/O state */
+ opus_int32 st_res_Q12[], /* I short term residual signal */
+ opus_int32 xw_Q3[], /* O prefiltered signal */
+ opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */
+ opus_int Tilt_Q14, /* I Tilt shaping coeficient */
+ opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */
+ opus_int lag, /* I Lag for harmonic shaping */
+ opus_int length /* I Length of signals */
+)
+{
+ opus_int i, idx, LTP_shp_buf_idx;
+ opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
+ opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
+ opus_int16 *LTP_shp_buf;
+
+ /* To speed up use temp variables instead of using the struct */
+ LTP_shp_buf = P->sLTP_shp;
+ LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+ sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12;
+ sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12;
+
+ for( i = 0; i < length; i++ ) {
+ if( lag > 0 ) {
+ /* unrolled loop */
+ silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+ idx = lag + LTP_shp_buf_idx;
+ n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+ n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+ n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+ } else {
+ n_LTP_Q12 = 0;
+ }
+
+ n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+ n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+ sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+ sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) );
+
+ LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+ LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+ xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
+ }
+
+ /* Copy temp variable back to state */
+ P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12;
+ P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12;
+ P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
diff --git a/drivers/opus/silk/fixed/process_gains_FIX.c b/drivers/opus/silk/fixed/process_gains_FIX.c
new file mode 100644
index 0000000000..3a78c475bb
--- /dev/null
+++ b/drivers/opus/silk/fixed/process_gains_FIX.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "tuning_parameters.h"
+
+/* Processing of gains */
+void silk_process_gains_FIX(
+ silk_encoder_state_FIX *psEnc, /* I/O Encoder state */
+ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+ opus_int k;
+ opus_int32 s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10;
+
+ /* Gain reduction when LTP coding gain is high */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */
+ s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 );
+ }
+ }
+
+ /* Limit the quantized signal */
+ /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */
+ InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin(
+ silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length );
+
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ /* Soft limit on ratio residual energy and squared gains */
+ ResNrg = psEncCtrl->ResNrg[ k ];
+ ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 );
+ if( psEncCtrl->ResNrgQ[ k ] > 0 ) {
+ ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] );
+ } else {
+ if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) {
+ ResNrgPart = silk_int32_MAX;
+ } else {
+ ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] );
+ }
+ }
+ gain = psEncCtrl->Gains_Q16[ k ];
+ gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) );
+ if( gain_squared < silk_int16_MAX ) {
+ /* recalculate with higher precision */
+ gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain );
+ silk_assert( gain_squared > 0 );
+ gain = silk_SQRT_APPROX( gain_squared ); /* Q8 */
+ gain = silk_min( gain, silk_int32_MAX >> 8 );
+ psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 ); /* Q16 */
+ } else {
+ gain = silk_SQRT_APPROX( gain_squared ); /* Q0 */
+ gain = silk_min( gain, silk_int32_MAX >> 16 );
+ psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 16 ); /* Q16 */
+ }
+ }
+
+ /* Save unquantized gains and gain Index */
+ silk_memcpy( psEncCtrl->GainsUnq_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+ psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex;
+
+ /* Quantize gains */
+ silk_gains_quant( psEnc->sCmn.indices.GainsIndices, psEncCtrl->Gains_Q16,
+ &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+ /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ if( psEncCtrl->LTPredCodGain_Q7 + silk_RSHIFT( psEnc->sCmn.input_tilt_Q15, 8 ) > SILK_FIX_CONST( 1.0, 7 ) ) {
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ } else {
+ psEnc->sCmn.indices.quantOffsetType = 1;
+ }
+ }
+
+ /* Quantizer boundary adjustment */
+ quant_offset_Q10 = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ];
+ psEncCtrl->Lambda_Q10 = SILK_FIX_CONST( LAMBDA_OFFSET, 10 )
+ + silk_SMULBB( SILK_FIX_CONST( LAMBDA_DELAYED_DECISIONS, 10 ), psEnc->sCmn.nStatesDelayedDecision )
+ + silk_SMULWB( SILK_FIX_CONST( LAMBDA_SPEECH_ACT, 18 ), psEnc->sCmn.speech_activity_Q8 )
+ + silk_SMULWB( SILK_FIX_CONST( LAMBDA_INPUT_QUALITY, 12 ), psEncCtrl->input_quality_Q14 )
+ + silk_SMULWB( SILK_FIX_CONST( LAMBDA_CODING_QUALITY, 12 ), psEncCtrl->coding_quality_Q14 )
+ + silk_SMULWB( SILK_FIX_CONST( LAMBDA_QUANT_OFFSET, 16 ), quant_offset_Q10 );
+
+ silk_assert( psEncCtrl->Lambda_Q10 > 0 );
+ silk_assert( psEncCtrl->Lambda_Q10 < SILK_FIX_CONST( 2, 10 ) );
+}
diff --git a/drivers/opus/silk/fixed/regularize_correlations_FIX.c b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
new file mode 100644
index 0000000000..a3378fdd17
--- /dev/null
+++ b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FIX(
+ opus_int32 *XX, /* I/O Correlation matrices */
+ opus_int32 *xx, /* I/O Correlation values */
+ opus_int32 noise, /* I Noise to add */
+ opus_int D /* I Dimension of XX */
+)
+{
+ opus_int i;
+ for( i = 0; i < D; i++ ) {
+ matrix_ptr( &XX[ 0 ], i, i, D ) = silk_ADD32( matrix_ptr( &XX[ 0 ], i, i, D ), noise );
+ }
+ xx[ 0 ] += noise;
+}
diff --git a/drivers/opus/silk/fixed/residual_energy16_FIX.c b/drivers/opus/silk/fixed/residual_energy16_FIX.c
new file mode 100644
index 0000000000..39bdff2a72
--- /dev/null
+++ b/drivers/opus/silk/fixed/residual_energy16_FIX.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+opus_int32 silk_residual_energy16_covar_FIX(
+ const opus_int16 *c, /* I Prediction vector */
+ const opus_int32 *wXX, /* I Correlation matrix */
+ const opus_int32 *wXx, /* I Correlation vector */
+ opus_int32 wxx, /* I Signal energy */
+ opus_int D, /* I Dimension */
+ opus_int cQ /* I Q value for c vector 0 - 15 */
+)
+{
+ opus_int i, j, lshifts, Qxtra;
+ opus_int32 c_max, w_max, tmp, tmp2, nrg;
+ opus_int cn[ MAX_MATRIX_SIZE ];
+ const opus_int32 *pRow;
+
+ /* Safety checks */
+ silk_assert( D >= 0 );
+ silk_assert( D <= 16 );
+ silk_assert( cQ > 0 );
+ silk_assert( cQ < 16 );
+
+ lshifts = 16 - cQ;
+ Qxtra = lshifts;
+
+ c_max = 0;
+ for( i = 0; i < D; i++ ) {
+ c_max = silk_max_32( c_max, silk_abs( (opus_int32)c[ i ] ) );
+ }
+ Qxtra = silk_min_int( Qxtra, silk_CLZ32( c_max ) - 17 );
+
+ w_max = silk_max_32( wXX[ 0 ], wXX[ D * D - 1 ] );
+ Qxtra = silk_min_int( Qxtra, silk_CLZ32( silk_MUL( D, silk_RSHIFT( silk_SMULWB( w_max, c_max ), 4 ) ) ) - 5 );
+ Qxtra = silk_max_int( Qxtra, 0 );
+ for( i = 0; i < D; i++ ) {
+ cn[ i ] = silk_LSHIFT( ( opus_int )c[ i ], Qxtra );
+ silk_assert( silk_abs(cn[i]) <= ( silk_int16_MAX + 1 ) ); /* Check that silk_SMLAWB can be used */
+ }
+ lshifts -= Qxtra;
+
+ /* Compute wxx - 2 * wXx * c */
+ tmp = 0;
+ for( i = 0; i < D; i++ ) {
+ tmp = silk_SMLAWB( tmp, wXx[ i ], cn[ i ] );
+ }
+ nrg = silk_RSHIFT( wxx, 1 + lshifts ) - tmp; /* Q: -lshifts - 1 */
+
+ /* Add c' * wXX * c, assuming wXX is symmetric */
+ tmp2 = 0;
+ for( i = 0; i < D; i++ ) {
+ tmp = 0;
+ pRow = &wXX[ i * D ];
+ for( j = i + 1; j < D; j++ ) {
+ tmp = silk_SMLAWB( tmp, pRow[ j ], cn[ j ] );
+ }
+ tmp = silk_SMLAWB( tmp, silk_RSHIFT( pRow[ i ], 1 ), cn[ i ] );
+ tmp2 = silk_SMLAWB( tmp2, tmp, cn[ i ] );
+ }
+ nrg = silk_ADD_LSHIFT32( nrg, tmp2, lshifts ); /* Q: -lshifts - 1 */
+
+ /* Keep one bit free always, because we add them for LSF interpolation */
+ if( nrg < 1 ) {
+ nrg = 1;
+ } else if( nrg > silk_RSHIFT( silk_int32_MAX, lshifts + 2 ) ) {
+ nrg = silk_int32_MAX >> 1;
+ } else {
+ nrg = silk_LSHIFT( nrg, lshifts + 1 ); /* Q0 */
+ }
+ return nrg;
+
+}
diff --git a/drivers/opus/silk/fixed/residual_energy_FIX.c b/drivers/opus/silk/fixed/residual_energy_FIX.c
new file mode 100644
index 0000000000..13dbc51e39
--- /dev/null
+++ b/drivers/opus/silk/fixed/residual_energy_FIX.c
@@ -0,0 +1,97 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order */
+/* of preceding samples */
+void silk_residual_energy_FIX(
+ opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */
+ opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */
+ const opus_int16 x[], /* I Input signal */
+ opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */
+ const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */
+ const opus_int subfr_length, /* I Subframe length */
+ const opus_int nb_subfr, /* I Number of subframes */
+ const opus_int LPC_order /* I LPC order */
+)
+{
+ opus_int offset, i, j, rshift, lz1, lz2;
+ opus_int16 *LPC_res_ptr;
+ VARDECL( opus_int16, LPC_res );
+ const opus_int16 *x_ptr;
+ opus_int32 tmp32;
+ SAVE_STACK;
+
+ x_ptr = x;
+ offset = LPC_order + subfr_length;
+
+ /* Filter input to create the LPC residual for each frame half, and measure subframe energies */
+ ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 );
+ silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
+ for( i = 0; i < nb_subfr >> 1; i++ ) {
+ /* Calculate half frame LPC residual signal including preceding samples */
+ silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order );
+
+ /* Point to first subframe of the just calculated LPC residual signal */
+ LPC_res_ptr = LPC_res + LPC_order;
+ for( j = 0; j < ( MAX_NB_SUBFR >> 1 ); j++ ) {
+ /* Measure subframe energy */
+ silk_sum_sqr_shift( &nrgs[ i * ( MAX_NB_SUBFR >> 1 ) + j ], &rshift, LPC_res_ptr, subfr_length );
+
+ /* Set Q values for the measured energy */
+ nrgsQ[ i * ( MAX_NB_SUBFR >> 1 ) + j ] = -rshift;
+
+ /* Move to next subframe */
+ LPC_res_ptr += offset;
+ }
+ /* Move to next frame half */
+ x_ptr += ( MAX_NB_SUBFR >> 1 ) * offset;
+ }
+
+ /* Apply the squared subframe gains */
+ for( i = 0; i < nb_subfr; i++ ) {
+ /* Fully upscale gains and energies */
+ lz1 = silk_CLZ32( nrgs[ i ] ) - 1;
+ lz2 = silk_CLZ32( gains[ i ] ) - 1;
+
+ tmp32 = silk_LSHIFT32( gains[ i ], lz2 );
+
+ /* Find squared gains */
+ tmp32 = silk_SMMUL( tmp32, tmp32 ); /* Q( 2 * lz2 - 32 )*/
+
+ /* Scale energies */
+ nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/
+ nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32;
+ }
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/schur64_FIX.c b/drivers/opus/silk/fixed/schur64_FIX.c
new file mode 100644
index 0000000000..22c0952ffd
--- /dev/null
+++ b/drivers/opus/silk/fixed/schur64_FIX.c
@@ -0,0 +1,92 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Slower than schur(), but more accurate. */
+/* Uses SMULL(), available on armv4 */
+opus_int32 silk_schur64( /* O returns residual energy */
+ opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */
+ const opus_int32 c[], /* I Correlations [order+1] */
+ opus_int32 order /* I Prediction order */
+)
+{
+ opus_int k, n;
+ opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+ opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31;
+
+ silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+ /* Check for invalid input */
+ if( c[ 0 ] <= 0 ) {
+ silk_memset( rc_Q16, 0, order * sizeof( opus_int32 ) );
+ return 0;
+ }
+
+ for( k = 0; k < order + 1; k++ ) {
+ C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ];
+ }
+
+ for( k = 0; k < order; k++ ) {
+ /* Check that we won't be getting an unstable rc, otherwise stop here. */
+ if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) {
+ if ( C[ k + 1 ][ 0 ] > 0 ) {
+ rc_Q16[ k ] = -SILK_FIX_CONST( .99f, 16 );
+ } else {
+ rc_Q16[ k ] = SILK_FIX_CONST( .99f, 16 );
+ }
+ k++;
+ break;
+ }
+
+ /* Get reflection coefficient: divide two Q30 values and get result in Q31 */
+ rc_tmp_Q31 = silk_DIV32_varQ( -C[ k + 1 ][ 0 ], C[ 0 ][ 1 ], 31 );
+
+ /* Save the output */
+ rc_Q16[ k ] = silk_RSHIFT_ROUND( rc_tmp_Q31, 15 );
+
+ /* Update correlations */
+ for( n = 0; n < order - k; n++ ) {
+ Ctmp1_Q30 = C[ n + k + 1 ][ 0 ];
+ Ctmp2_Q30 = C[ n ][ 1 ];
+
+ /* Multiply and add the highest int32 */
+ C[ n + k + 1 ][ 0 ] = Ctmp1_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp2_Q30, 1 ), rc_tmp_Q31 );
+ C[ n ][ 1 ] = Ctmp2_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp1_Q30, 1 ), rc_tmp_Q31 );
+ }
+ }
+
+ for(; k < order; k++ ) {
+ rc_Q16[ k ] = 0;
+ }
+
+ return silk_max_32( 1, C[ 0 ][ 1 ] );
+}
diff --git a/drivers/opus/silk/fixed/schur_FIX.c b/drivers/opus/silk/fixed/schur_FIX.c
new file mode 100644
index 0000000000..e8b24cf068
--- /dev/null
+++ b/drivers/opus/silk/fixed/schur_FIX.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Faster than schur64(), but much less accurate. */
+/* uses SMLAWB(), requiring armv5E and higher. */
+opus_int32 silk_schur( /* O Returns residual energy */
+ opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */
+ const opus_int32 *c, /* I correlations [order+1] */
+ const opus_int32 order /* I prediction order */
+)
+{
+ opus_int k, n, lz;
+ opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+ opus_int32 Ctmp1, Ctmp2, rc_tmp_Q15;
+
+ silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+ /* Get number of leading zeros */
+ lz = silk_CLZ32( c[ 0 ] );
+
+ /* Copy correlations and adjust level to Q30 */
+ if( lz < 2 ) {
+ /* lz must be 1, so shift one to the right */
+ for( k = 0; k < order + 1; k++ ) {
+ C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 );
+ }
+ } else if( lz > 2 ) {
+ /* Shift to the left */
+ lz -= 2;
+ for( k = 0; k < order + 1; k++ ) {
+ C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz );
+ }
+ } else {
+ /* No need to shift */
+ for( k = 0; k < order + 1; k++ ) {
+ C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ];
+ }
+ }
+
+ for( k = 0; k < order; k++ ) {
+ /* Check that we won't be getting an unstable rc, otherwise stop here. */
+ if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) {
+ if ( C[ k + 1 ][ 0 ] > 0 ) {
+ rc_Q15[ k ] = -SILK_FIX_CONST( .99f, 15 );
+ } else {
+ rc_Q15[ k ] = SILK_FIX_CONST( .99f, 15 );
+ }
+ k++;
+ break;
+ }
+
+ /* Get reflection coefficient */
+ rc_tmp_Q15 = -silk_DIV32_16( C[ k + 1 ][ 0 ], silk_max_32( silk_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) );
+
+ /* Clip (shouldn't happen for properly conditioned inputs) */
+ rc_tmp_Q15 = silk_SAT16( rc_tmp_Q15 );
+
+ /* Store */
+ rc_Q15[ k ] = (opus_int16)rc_tmp_Q15;
+
+ /* Update correlations */
+ for( n = 0; n < order - k; n++ ) {
+ Ctmp1 = C[ n + k + 1 ][ 0 ];
+ Ctmp2 = C[ n ][ 1 ];
+ C[ n + k + 1 ][ 0 ] = silk_SMLAWB( Ctmp1, silk_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 );
+ C[ n ][ 1 ] = silk_SMLAWB( Ctmp2, silk_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 );
+ }
+ }
+
+ for(; k < order; k++ ) {
+ rc_Q15[ k ] = 0;
+ }
+
+ /* return residual energy */
+ return silk_max_32( 1, C[ 0 ][ 1 ] );
+}
diff --git a/drivers/opus/silk/fixed/solve_LS_FIX.c b/drivers/opus/silk/fixed/solve_LS_FIX.c
new file mode 100644
index 0000000000..5d09284935
--- /dev/null
+++ b/drivers/opus/silk/fixed/solve_LS_FIX.c
@@ -0,0 +1,249 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/*****************************/
+/* Internal function headers */
+/*****************************/
+
+typedef struct {
+ opus_int32 Q36_part;
+ opus_int32 Q48_part;
+} inv_D_t;
+
+/* Factorize square matrix A into LDL form */
+static OPUS_INLINE void silk_LDL_factorize_FIX(
+ opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */
+ opus_int M, /* I Size of Matrix */
+ opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */
+ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */
+);
+
+/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
+ const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */
+ opus_int M, /* I Dim of Matrix equation */
+ const opus_int32 *b, /* I b Vector */
+ opus_int32 *x_Q16 /* O x Vector */
+);
+
+/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
+ const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */
+ const opus_int M, /* I Dim of Matrix equation */
+ const opus_int32 *b, /* I b Vector */
+ opus_int32 *x_Q16 /* O x Vector */
+);
+
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
+ opus_int32 T[], /* I/O Numenator vector */
+ inv_D_t *inv_D, /* I 1 / D vector */
+ opus_int M /* I dimension */
+);
+
+/* Solves Ax = b, assuming A is symmetric */
+void silk_solve_LDL_FIX(
+ opus_int32 *A, /* I Pointer to symetric square matrix A */
+ opus_int M, /* I Size of matrix */
+ const opus_int32 *b, /* I Pointer to b vector */
+ opus_int32 *x_Q16 /* O Pointer to x solution vector */
+)
+{
+ VARDECL( opus_int32, L_Q16 );
+ opus_int32 Y[ MAX_MATRIX_SIZE ];
+ inv_D_t inv_D[ MAX_MATRIX_SIZE ];
+ SAVE_STACK;
+
+ silk_assert( M <= MAX_MATRIX_SIZE );
+ ALLOC( L_Q16, M * M, opus_int32 );
+
+ /***************************************************
+ Factorize A by LDL such that A = L*D*L',
+ where L is lower triangular with ones on diagonal
+ ****************************************************/
+ silk_LDL_factorize_FIX( A, M, L_Q16, inv_D );
+
+ /****************************************************
+ * substitute D*L'*x = Y. ie:
+ L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b
+ ******************************************************/
+ silk_LS_SolveFirst_FIX( L_Q16, M, b, Y );
+
+ /****************************************************
+ D*L'*x = Y <=> L'*x = inv(D)*Y, because D is
+ diagonal just multiply with 1/d_i
+ ****************************************************/
+ silk_LS_divide_Q16_FIX( Y, inv_D, M );
+
+ /****************************************************
+ x = inv(L') * inv(D) * Y
+ *****************************************************/
+ silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
+ RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_LDL_factorize_FIX(
+ opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */
+ opus_int M, /* I Size of Matrix */
+ opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */
+ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */
+)
+{
+ opus_int i, j, k, status, loop_count;
+ const opus_int32 *ptr1, *ptr2;
+ opus_int32 diag_min_value, tmp_32, err;
+ opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ];
+ opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48;
+
+ silk_assert( M <= MAX_MATRIX_SIZE );
+
+ status = 1;
+ diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 );
+ for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) {
+ status = 0;
+ for( j = 0; j < M; j++ ) {
+ ptr1 = matrix_adr( L_Q16, j, 0, M );
+ tmp_32 = 0;
+ for( i = 0; i < j; i++ ) {
+ v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */
+ tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */
+ }
+ tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 );
+
+ if( tmp_32 < diag_min_value ) {
+ tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 );
+ /* Matrix not positive semi-definite, or ill conditioned */
+ for( i = 0; i < M; i++ ) {
+ matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 );
+ }
+ status = 1;
+ break;
+ }
+ D_Q0[ j ] = tmp_32; /* always < max(Correlation) */
+
+ /* two-step division */
+ one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */
+ one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */
+ err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */
+ one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */
+
+ /* Save 1/Ds */
+ inv_D[ j ].Q36_part = one_div_diag_Q36;
+ inv_D[ j ].Q48_part = one_div_diag_Q48;
+
+ matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */
+ ptr1 = matrix_adr( A, j, 0, M );
+ ptr2 = matrix_adr( L_Q16, j + 1, 0, M );
+ for( i = j + 1; i < M; i++ ) {
+ tmp_32 = 0;
+ for( k = 0; k < j; k++ ) {
+ tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */
+ }
+ tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */
+
+ /* tmp_32 / D_Q0[j] : Divide to Q16 */
+ matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ),
+ silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
+
+ /* go to next column */
+ ptr2 += M;
+ }
+ }
+ }
+
+ silk_assert( status == 0 );
+}
+
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
+ opus_int32 T[], /* I/O Numenator vector */
+ inv_D_t *inv_D, /* I 1 / D vector */
+ opus_int M /* I dimension */
+)
+{
+ opus_int i;
+ opus_int32 tmp_32;
+ opus_int32 one_div_diag_Q36, one_div_diag_Q48;
+
+ for( i = 0; i < M; i++ ) {
+ one_div_diag_Q36 = inv_D[ i ].Q36_part;
+ one_div_diag_Q48 = inv_D[ i ].Q48_part;
+
+ tmp_32 = T[ i ];
+ T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
+ }
+}
+
+/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
+ const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */
+ opus_int M, /* I Dim of Matrix equation */
+ const opus_int32 *b, /* I b Vector */
+ opus_int32 *x_Q16 /* O x Vector */
+)
+{
+ opus_int i, j;
+ const opus_int32 *ptr32;
+ opus_int32 tmp_32;
+
+ for( i = 0; i < M; i++ ) {
+ ptr32 = matrix_adr( L_Q16, i, 0, M );
+ tmp_32 = 0;
+ for( j = 0; j < i; j++ ) {
+ tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );
+ }
+ x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
+ }
+}
+
+/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
+ const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */
+ const opus_int M, /* I Dim of Matrix equation */
+ const opus_int32 *b, /* I b Vector */
+ opus_int32 *x_Q16 /* O x Vector */
+)
+{
+ opus_int i, j;
+ const opus_int32 *ptr32;
+ opus_int32 tmp_32;
+
+ for( i = M - 1; i >= 0; i-- ) {
+ ptr32 = matrix_adr( L_Q16, 0, i, M );
+ tmp_32 = 0;
+ for( j = M - 1; j > i; j-- ) {
+ tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] );
+ }
+ x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
+ }
+}
diff --git a/drivers/opus/silk/fixed/structs_FIX.h b/drivers/opus/silk/fixed/structs_FIX.h
new file mode 100644
index 0000000000..0284dfa27a
--- /dev/null
+++ b/drivers/opus/silk/fixed/structs_FIX.h
@@ -0,0 +1,133 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_FIX_H
+#define SILK_STRUCTS_FIX_H
+
+#include "typedef.h"
+#include "silk_main.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/********************************/
+/* Noise shaping analysis state */
+/********************************/
+typedef struct {
+ opus_int8 LastGainIndex;
+ opus_int32 HarmBoost_smth_Q16;
+ opus_int32 HarmShapeGain_smth_Q16;
+ opus_int32 Tilt_smth_Q16;
+} silk_shape_state_FIX;
+
+/********************************/
+/* Prefilter state */
+/********************************/
+typedef struct {
+ opus_int16 sLTP_shp[ LTP_BUF_LENGTH ];
+ opus_int32 sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ];
+ opus_int sLTP_shp_buf_idx;
+ opus_int32 sLF_AR_shp_Q12;
+ opus_int32 sLF_MA_shp_Q12;
+ opus_int32 sHarmHP_Q2;
+ opus_int32 rand_seed;
+ opus_int lagPrev;
+} silk_prefilter_state_FIX;
+
+/********************************/
+/* Encoder state FIX */
+/********************************/
+typedef struct {
+ silk_encoder_state sCmn; /* Common struct, shared with floating-point code */
+ silk_shape_state_FIX sShape; /* Shape state */
+ silk_prefilter_state_FIX sPrefilt; /* Prefilter State */
+
+ /* Buffer for find pitch and noise shape analysis */
+ silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */
+ opus_int LTPCorr_Q15; /* Normalized correlation from pitch lag estimator */
+} silk_encoder_state_FIX;
+
+/************************/
+/* Encoder control FIX */
+/************************/
+typedef struct {
+ /* Prediction and coding parameters */
+ opus_int32 Gains_Q16[ MAX_NB_SUBFR ];
+ silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+ opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+ opus_int LTP_scale_Q14;
+ opus_int pitchL[ MAX_NB_SUBFR ];
+
+ /* Noise shaping parameters */
+ /* Testing */
+ silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+ silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+ opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */
+ opus_int GainsPre_Q14[ MAX_NB_SUBFR ];
+ opus_int HarmBoost_Q14[ MAX_NB_SUBFR ];
+ opus_int Tilt_Q14[ MAX_NB_SUBFR ];
+ opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ];
+ opus_int Lambda_Q10;
+ opus_int input_quality_Q14;
+ opus_int coding_quality_Q14;
+
+ /* measures */
+ opus_int sparseness_Q8;
+ opus_int32 predGain_Q16;
+ opus_int LTPredCodGain_Q7;
+ opus_int32 ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */
+ opus_int ResNrgQ[ MAX_NB_SUBFR ]; /* Q domain for the residual energy > 0 */
+
+ /* Parameters for CBR mode */
+ opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ];
+ opus_int8 lastGainIndexPrev;
+} silk_encoder_control_FIX;
+
+/************************/
+/* Encoder Super Struct */
+/************************/
+typedef struct {
+ silk_encoder_state_FIX state_Fxx[ ENCODER_NUM_CHANNELS ];
+ stereo_enc_state sStereo;
+ opus_int32 nBitsExceeded;
+ opus_int nChannelsAPI;
+ opus_int nChannelsInternal;
+ opus_int nPrevChannelsInternal;
+ opus_int timeSinceSwitchAllowed_ms;
+ opus_int allowBandwidthSwitch;
+ opus_int prev_decode_only_middle;
+} silk_encoder;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/fixed/vector_ops_FIX.c b/drivers/opus/silk/fixed/vector_ops_FIX.c
new file mode 100644
index 0000000000..b1e422eb91
--- /dev/null
+++ b/drivers/opus/silk/fixed/vector_ops_FIX.c
@@ -0,0 +1,96 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Copy and multiply a vector by a constant */
+void silk_scale_copy_vector16(
+ opus_int16 *data_out,
+ const opus_int16 *data_in,
+ opus_int32 gain_Q16, /* I Gain in Q16 */
+ const opus_int dataSize /* I Length */
+)
+{
+ opus_int i;
+ opus_int32 tmp32;
+
+ for( i = 0; i < dataSize; i++ ) {
+ tmp32 = silk_SMULWB( gain_Q16, data_in[ i ] );
+ data_out[ i ] = (opus_int16)silk_CHECK_FIT16( tmp32 );
+ }
+}
+
+/* Multiply a vector by a constant */
+void silk_scale_vector32_Q26_lshift_18(
+ opus_int32 *data1, /* I/O Q0/Q18 */
+ opus_int32 gain_Q26, /* I Q26 */
+ opus_int dataSize /* I length */
+)
+{
+ opus_int i;
+
+ for( i = 0; i < dataSize; i++ ) {
+ data1[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( silk_SMULL( data1[ i ], gain_Q26 ), 8 ) ); /* OUTPUT: Q18 */
+ }
+}
+
+/* sum = for(i=0;i<len;i++)inVec1[i]*inVec2[i]; --- inner product */
+/* Note for ARM asm: */
+/* * inVec1 and inVec2 should be at least 2 byte aligned. */
+/* * len should be positive 16bit integer. */
+/* * only when len>6, memory access can be reduced by half. */
+opus_int32 silk_inner_prod_aligned(
+ const opus_int16 *const inVec1, /* I input vector 1 */
+ const opus_int16 *const inVec2, /* I input vector 2 */
+ const opus_int len /* I vector lengths */
+)
+{
+ opus_int i;
+ opus_int32 sum = 0;
+ for( i = 0; i < len; i++ ) {
+ sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
+ }
+ return sum;
+}
+
+opus_int64 silk_inner_prod16_aligned_64(
+ const opus_int16 *inVec1, /* I input vector 1 */
+ const opus_int16 *inVec2, /* I input vector 2 */
+ const opus_int len /* I vector lengths */
+)
+{
+ opus_int i;
+ opus_int64 sum = 0;
+ for( i = 0; i < len; i++ ) {
+ sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] );
+ }
+ return sum;
+}
diff --git a/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
new file mode 100644
index 0000000000..3f04df775c
--- /dev/null
+++ b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+#define QC 10
+#define QS 14
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FIX(
+ opus_int32 *corr, /* O Result [order + 1] */
+ opus_int *scale, /* O Scaling of the correlation vector */
+ const opus_int16 *input, /* I Input data to correlate */
+ const opus_int warping_Q16, /* I Warping coefficient */
+ const opus_int length, /* I Length of input */
+ const opus_int order /* I Correlation order (even) */
+)
+{
+ opus_int n, i, lsh;
+ opus_int32 tmp1_QS, tmp2_QS;
+ opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+ opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+
+ /* Order must be even */
+ silk_assert( ( order & 1 ) == 0 );
+ silk_assert( 2 * QS - QC >= 0 );
+
+ /* Loop over samples */
+ for( n = 0; n < length; n++ ) {
+ tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+ /* Loop over allpass sections */
+ for( i = 0; i < order; i += 2 ) {
+ /* Output of allpass section */
+ tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+ state_QS[ i ] = tmp1_QS;
+ corr_QC[ i ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
+ /* Output of allpass section */
+ tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+ state_QS[ i + 1 ] = tmp2_QS;
+ corr_QC[ i + 1 ] += silk_RSHIFT64( silk_SMULL( tmp2_QS, state_QS[ 0 ] ), 2 * QS - QC );
+ }
+ state_QS[ order ] = tmp1_QS;
+ corr_QC[ order ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
+ }
+
+ lsh = silk_CLZ64( corr_QC[ 0 ] ) - 35;
+ lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );
+ *scale = -( QC + lsh );
+ silk_assert( *scale >= -30 && *scale <= 12 );
+ if( lsh >= 0 ) {
+ for( i = 0; i < order + 1; i++ ) {
+ corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QC[ i ], lsh ) );
+ }
+ } else {
+ for( i = 0; i < order + 1; i++ ) {
+ corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QC[ i ], -lsh ) );
+ }
+ }
+ silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
+}
diff --git a/drivers/opus/silk/float/LPC_analysis_filter_FLP.c b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c
new file mode 100644
index 0000000000..8d26c093bf
--- /dev/null
+++ b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c
@@ -0,0 +1,249 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdlib.h>
+#include "main_FLP.h"
+
+/************************************************/
+/* LPC analysis filter */
+/* NB! State is kept internally and the */
+/* filter always starts with zero state */
+/* first Order output samples are set to zero */
+/************************************************/
+
+/* 16th order LPC analysis filter, does not write first 16 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter16_FLP(
+ silk_float r_LPC[], /* O LPC residual signal */
+ const silk_float PredCoef[], /* I LPC coefficients */
+ const silk_float s[], /* I Input signal */
+ const opus_int length /* I Length of input signal */
+)
+{
+ opus_int ix;
+ silk_float LPC_pred;
+ const silk_float *s_ptr;
+
+ for( ix = 16; ix < length; ix++ ) {
+ s_ptr = &s[ix - 1];
+
+ /* short-term prediction */
+ LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] +
+ s_ptr[ -1 ] * PredCoef[ 1 ] +
+ s_ptr[ -2 ] * PredCoef[ 2 ] +
+ s_ptr[ -3 ] * PredCoef[ 3 ] +
+ s_ptr[ -4 ] * PredCoef[ 4 ] +
+ s_ptr[ -5 ] * PredCoef[ 5 ] +
+ s_ptr[ -6 ] * PredCoef[ 6 ] +
+ s_ptr[ -7 ] * PredCoef[ 7 ] +
+ s_ptr[ -8 ] * PredCoef[ 8 ] +
+ s_ptr[ -9 ] * PredCoef[ 9 ] +
+ s_ptr[ -10 ] * PredCoef[ 10 ] +
+ s_ptr[ -11 ] * PredCoef[ 11 ] +
+ s_ptr[ -12 ] * PredCoef[ 12 ] +
+ s_ptr[ -13 ] * PredCoef[ 13 ] +
+ s_ptr[ -14 ] * PredCoef[ 14 ] +
+ s_ptr[ -15 ] * PredCoef[ 15 ];
+
+ /* prediction error */
+ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+ }
+}
+
+/* 12th order LPC analysis filter, does not write first 12 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter12_FLP(
+ silk_float r_LPC[], /* O LPC residual signal */
+ const silk_float PredCoef[], /* I LPC coefficients */
+ const silk_float s[], /* I Input signal */
+ const opus_int length /* I Length of input signal */
+)
+{
+ opus_int ix;
+ silk_float LPC_pred;
+ const silk_float *s_ptr;
+
+ for( ix = 12; ix < length; ix++ ) {
+ s_ptr = &s[ix - 1];
+
+ /* short-term prediction */
+ LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] +
+ s_ptr[ -1 ] * PredCoef[ 1 ] +
+ s_ptr[ -2 ] * PredCoef[ 2 ] +
+ s_ptr[ -3 ] * PredCoef[ 3 ] +
+ s_ptr[ -4 ] * PredCoef[ 4 ] +
+ s_ptr[ -5 ] * PredCoef[ 5 ] +
+ s_ptr[ -6 ] * PredCoef[ 6 ] +
+ s_ptr[ -7 ] * PredCoef[ 7 ] +
+ s_ptr[ -8 ] * PredCoef[ 8 ] +
+ s_ptr[ -9 ] * PredCoef[ 9 ] +
+ s_ptr[ -10 ] * PredCoef[ 10 ] +
+ s_ptr[ -11 ] * PredCoef[ 11 ];
+
+ /* prediction error */
+ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+ }
+}
+
+/* 10th order LPC analysis filter, does not write first 10 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter10_FLP(
+ silk_float r_LPC[], /* O LPC residual signal */
+ const silk_float PredCoef[], /* I LPC coefficients */
+ const silk_float s[], /* I Input signal */
+ const opus_int length /* I Length of input signal */
+)
+{
+ opus_int ix;
+ silk_float LPC_pred;
+ const silk_float *s_ptr;
+
+ for( ix = 10; ix < length; ix++ ) {
+ s_ptr = &s[ix - 1];
+
+ /* short-term prediction */
+ LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] +
+ s_ptr[ -1 ] * PredCoef[ 1 ] +
+ s_ptr[ -2 ] * PredCoef[ 2 ] +
+ s_ptr[ -3 ] * PredCoef[ 3 ] +
+ s_ptr[ -4 ] * PredCoef[ 4 ] +
+ s_ptr[ -5 ] * PredCoef[ 5 ] +
+ s_ptr[ -6 ] * PredCoef[ 6 ] +
+ s_ptr[ -7 ] * PredCoef[ 7 ] +
+ s_ptr[ -8 ] * PredCoef[ 8 ] +
+ s_ptr[ -9 ] * PredCoef[ 9 ];
+
+ /* prediction error */
+ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+ }
+}
+
+/* 8th order LPC analysis filter, does not write first 8 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter8_FLP(
+ silk_float r_LPC[], /* O LPC residual signal */
+ const silk_float PredCoef[], /* I LPC coefficients */
+ const silk_float s[], /* I Input signal */
+ const opus_int length /* I Length of input signal */
+)
+{
+ opus_int ix;
+ silk_float LPC_pred;
+ const silk_float *s_ptr;
+
+ for( ix = 8; ix < length; ix++ ) {
+ s_ptr = &s[ix - 1];
+
+ /* short-term prediction */
+ LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] +
+ s_ptr[ -1 ] * PredCoef[ 1 ] +
+ s_ptr[ -2 ] * PredCoef[ 2 ] +
+ s_ptr[ -3 ] * PredCoef[ 3 ] +
+ s_ptr[ -4 ] * PredCoef[ 4 ] +
+ s_ptr[ -5 ] * PredCoef[ 5 ] +
+ s_ptr[ -6 ] * PredCoef[ 6 ] +
+ s_ptr[ -7 ] * PredCoef[ 7 ];
+
+ /* prediction error */
+ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+ }
+}
+
+/* 6th order LPC analysis filter, does not write first 6 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter6_FLP(
+ silk_float r_LPC[], /* O LPC residual signal */
+ const silk_float PredCoef[], /* I LPC coefficients */
+ const silk_float s[], /* I Input signal */
+ const opus_int length /* I Length of input signal */
+)
+{
+ opus_int ix;
+ silk_float LPC_pred;
+ const silk_float *s_ptr;
+
+ for( ix = 6; ix < length; ix++ ) {
+ s_ptr = &s[ix - 1];
+
+ /* short-term prediction */
+ LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] +
+ s_ptr[ -1 ] * PredCoef[ 1 ] +
+ s_ptr[ -2 ] * PredCoef[ 2 ] +
+ s_ptr[ -3 ] * PredCoef[ 3 ] +
+ s_ptr[ -4 ] * PredCoef[ 4 ] +
+ s_ptr[ -5 ] * PredCoef[ 5 ];
+
+ /* prediction error */
+ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+ }
+}
+
+/************************************************/
+/* LPC analysis filter */
+/* NB! State is kept internally and the */
+/* filter always starts with zero state */
+/* first Order output samples are set to zero */
+/************************************************/
+void silk_LPC_analysis_filter_FLP(
+ silk_float r_LPC[], /* O LPC residual signal */
+ const silk_float PredCoef[], /* I LPC coefficients */
+ const silk_float s[], /* I Input signal */
+ const opus_int length, /* I Length of input signal */
+ const opus_int Order /* I LPC order */
+)
+{
+ silk_assert( Order <= length );
+
+ switch( Order ) {
+ case 6:
+ silk_LPC_analysis_filter6_FLP( r_LPC, PredCoef, s, length );
+ break;
+
+ case 8:
+ silk_LPC_analysis_filter8_FLP( r_LPC, PredCoef, s, length );
+ break;
+
+ case 10:
+ silk_LPC_analysis_filter10_FLP( r_LPC, PredCoef, s, length );
+ break;
+
+ case 12:
+ silk_LPC_analysis_filter12_FLP( r_LPC, PredCoef, s, length );
+ break;
+
+ case 16:
+ silk_LPC_analysis_filter16_FLP( r_LPC, PredCoef, s, length );
+ break;
+
+ default:
+ silk_assert( 0 );
+ break;
+ }
+
+ /* Set first Order output samples to zero */
+ silk_memset( r_LPC, 0, Order * sizeof( silk_float ) );
+}
+
diff --git a/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c
new file mode 100644
index 0000000000..968edfb189
--- /dev/null
+++ b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c
@@ -0,0 +1,76 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "SigProc_FLP.h"
+
+#define RC_THRESHOLD 0.9999f
+
+/* compute inverse of LPC prediction gain, and */
+/* test if LPC coefficients are stable (all poles within unit circle) */
+/* this code is based on silk_a2k_FLP() */
+silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */
+ const silk_float *A, /* I prediction coefficients [order] */
+ opus_int32 order /* I prediction order */
+)
+{
+ opus_int k, n;
+ double invGain, rc, rc_mult1, rc_mult2;
+ silk_float Atmp[ 2 ][ SILK_MAX_ORDER_LPC ];
+ silk_float *Aold, *Anew;
+
+ Anew = Atmp[ order & 1 ];
+ silk_memcpy( Anew, A, order * sizeof(silk_float) );
+
+ invGain = 1.0;
+ for( k = order - 1; k > 0; k-- ) {
+ rc = -Anew[ k ];
+ if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) {
+ return 0.0f;
+ }
+ rc_mult1 = 1.0f - rc * rc;
+ rc_mult2 = 1.0f / rc_mult1;
+ invGain *= rc_mult1;
+ /* swap pointers */
+ Aold = Anew;
+ Anew = Atmp[ k & 1 ];
+ for( n = 0; n < k; n++ ) {
+ Anew[ n ] = (silk_float)( ( Aold[ n ] - Aold[ k - n - 1 ] * rc ) * rc_mult2 );
+ }
+ }
+ rc = -Anew[ 0 ];
+ if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) {
+ return 0.0f;
+ }
+ rc_mult1 = 1.0f - rc * rc;
+ invGain *= rc_mult1;
+ return (silk_float)invGain;
+}
diff --git a/drivers/opus/silk/float/LTP_analysis_filter_FLP.c b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c
new file mode 100644
index 0000000000..fc729e99b1
--- /dev/null
+++ b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c
@@ -0,0 +1,75 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+void silk_LTP_analysis_filter_FLP(
+ silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */
+ const silk_float *x, /* I Input signal, with preceding samples */
+ const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
+ const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */
+ const opus_int subfr_length, /* I Length of each subframe */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int pre_length /* I Preceding samples for each subframe */
+)
+{
+ const silk_float *x_ptr, *x_lag_ptr;
+ silk_float Btmp[ LTP_ORDER ];
+ silk_float *LTP_res_ptr;
+ silk_float inv_gain;
+ opus_int k, i, j;
+
+ x_ptr = x;
+ LTP_res_ptr = LTP_res;
+ for( k = 0; k < nb_subfr; k++ ) {
+ x_lag_ptr = x_ptr - pitchL[ k ];
+ inv_gain = invGains[ k ];
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ Btmp[ i ] = B[ k * LTP_ORDER + i ];
+ }
+
+ /* LTP analysis FIR filter */
+ for( i = 0; i < subfr_length + pre_length; i++ ) {
+ LTP_res_ptr[ i ] = x_ptr[ i ];
+ /* Subtract long-term prediction */
+ for( j = 0; j < LTP_ORDER; j++ ) {
+ LTP_res_ptr[ i ] -= Btmp[ j ] * x_lag_ptr[ LTP_ORDER / 2 - j ];
+ }
+ LTP_res_ptr[ i ] *= inv_gain;
+ x_lag_ptr++;
+ }
+
+ /* Update pointers */
+ LTP_res_ptr += subfr_length + pre_length;
+ x_ptr += subfr_length;
+ }
+}
diff --git a/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c
new file mode 100644
index 0000000000..60e1119d5a
--- /dev/null
+++ b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c
@@ -0,0 +1,52 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+void silk_LTP_scale_ctrl_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ opus_int round_loss;
+
+ if( condCoding == CODE_INDEPENDENTLY ) {
+ /* Only scale if first frame in packet */
+ round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket;
+ psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( round_loss * psEncCtrl->LTPredCodGain * 0.1f, 0.0f, 2.0f );
+ } else {
+ /* Default is minimum scaling */
+ psEnc->sCmn.indices.LTP_scaleIndex = 0;
+ }
+
+ psEncCtrl->LTP_scale = (silk_float)silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ] / 16384.0f;
+}
diff --git a/drivers/opus/silk/float/SigProc_FLP.h b/drivers/opus/silk/float/SigProc_FLP.h
new file mode 100644
index 0000000000..f0cb3733be
--- /dev/null
+++ b/drivers/opus/silk/float/SigProc_FLP.h
@@ -0,0 +1,204 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FLP_H
+#define SILK_SIGPROC_FLP_H
+
+#include "SigProc_FIX.h"
+#include "float_cast.h"
+#include <math.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/********************************************************************/
+/* SIGNAL PROCESSING FUNCTIONS */
+/********************************************************************/
+
+/* Chirp (bw expand) LP AR filter */
+void silk_bwexpander_FLP(
+ silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */
+ const opus_int d, /* I length of ar */
+ const silk_float chirp /* I chirp factor (typically in range (0..1) ) */
+);
+
+/* compute inverse of LPC prediction gain, and */
+/* test if LPC coefficients are stable (all poles within unit circle) */
+/* this code is based on silk_FLP_a2k() */
+silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */
+ const silk_float *A, /* I prediction coefficients [order] */
+ opus_int32 order /* I prediction order */
+);
+
+silk_float silk_schur_FLP( /* O returns residual energy */
+ silk_float refl_coef[], /* O reflection coefficients (length order) */
+ const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */
+ opus_int order /* I order */
+);
+
+void silk_k2a_FLP(
+ silk_float *A, /* O prediction coefficients [order] */
+ const silk_float *rc, /* I reflection coefficients [order] */
+ opus_int32 order /* I prediction order */
+);
+
+/* Solve the normal equations using the Levinson-Durbin recursion */
+silk_float silk_levinsondurbin_FLP( /* O prediction error energy */
+ silk_float A[], /* O prediction coefficients [order] */
+ const silk_float corr[], /* I input auto-correlations [order + 1] */
+ const opus_int order /* I prediction order */
+);
+
+/* compute autocorrelation */
+void silk_autocorrelation_FLP(
+ silk_float *results, /* O result (length correlationCount) */
+ const silk_float *inputData, /* I input data to correlate */
+ opus_int inputDataSize, /* I length of input */
+ opus_int correlationCount /* I number of correlation taps to compute */
+);
+
+opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */
+ const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
+ opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */
+ opus_int16 *lagIndex, /* O Lag Index */
+ opus_int8 *contourIndex, /* O Pitch contour Index */
+ silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */
+ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
+ const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */
+ const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */
+ const opus_int Fs_kHz, /* I sample frequency (kHz) */
+ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
+ const opus_int nb_subfr, /* I Number of 5 ms subframes */
+ int arch /* I Run-time architecture */
+);
+
+void silk_insertion_sort_decreasing_FLP(
+ silk_float *a, /* I/O Unsorted / Sorted vector */
+ opus_int *idx, /* O Index vector for the sorted elements */
+ const opus_int L, /* I Vector length */
+ const opus_int K /* I Number of correctly sorted positions */
+);
+
+/* Compute reflection coefficients from input signal */
+silk_float silk_burg_modified_FLP( /* O returns residual energy */
+ silk_float A[], /* O prediction coefficients (length order) */
+ const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
+ const silk_float minInvGain, /* I minimum inverse prediction gain */
+ const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
+ const opus_int nb_subfr, /* I number of subframes stacked in x */
+ const opus_int D /* I order */
+);
+
+/* multiply a vector by a constant */
+void silk_scale_vector_FLP(
+ silk_float *data1,
+ silk_float gain,
+ opus_int dataSize
+);
+
+/* copy and multiply a vector by a constant */
+void silk_scale_copy_vector_FLP(
+ silk_float *data_out,
+ const silk_float *data_in,
+ silk_float gain,
+ opus_int dataSize
+);
+
+/* inner product of two silk_float arrays, with result as double */
+double silk_inner_product_FLP(
+ const silk_float *data1,
+ const silk_float *data2,
+ opus_int dataSize
+);
+
+/* sum of squares of a silk_float array, with result as double */
+double silk_energy_FLP(
+ const silk_float *data,
+ opus_int dataSize
+);
+
+/********************************************************************/
+/* MACROS */
+/********************************************************************/
+
+#define PI (3.1415926536f)
+
+#define silk_min_float( a, b ) (((a) < (b)) ? (a) : (b))
+#define silk_max_float( a, b ) (((a) > (b)) ? (a) : (b))
+#define silk_abs_float( a ) ((silk_float)fabs(a))
+
+/* sigmoid function */
+static OPUS_INLINE silk_float silk_sigmoid( silk_float x )
+{
+ return (silk_float)(1.0 / (1.0 + exp(-x)));
+}
+
+/* floating-point to integer conversion (rounding) */
+static OPUS_INLINE opus_int32 silk_float2int( silk_float x )
+{
+ return (opus_int32)float2int( x );
+}
+
+/* floating-point to integer conversion (rounding) */
+static OPUS_INLINE void silk_float2short_array(
+ opus_int16 *out,
+ const silk_float *in,
+ opus_int32 length
+)
+{
+ opus_int32 k;
+ for( k = length - 1; k >= 0; k-- ) {
+ out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) );
+ }
+}
+
+/* integer to floating-point conversion */
+static OPUS_INLINE void silk_short2float_array(
+ silk_float *out,
+ const opus_int16 *in,
+ opus_int32 length
+)
+{
+ opus_int32 k;
+ for( k = length - 1; k >= 0; k-- ) {
+ out[k] = (silk_float)in[k];
+ }
+}
+
+/* using log2() helps the fixed-point conversion */
+static OPUS_INLINE silk_float silk_log2( double x )
+{
+ return ( silk_float )( 3.32192809488736 * log10( x ) );
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_SIGPROC_FLP_H */
diff --git a/drivers/opus/silk/float/apply_sine_window_FLP.c b/drivers/opus/silk/float/apply_sine_window_FLP.c
new file mode 100644
index 0000000000..d904585d17
--- /dev/null
+++ b/drivers/opus/silk/float/apply_sine_window_FLP.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Apply sine window to signal vector */
+/* Window types: */
+/* 1 -> sine window from 0 to pi/2 */
+/* 2 -> sine window from pi/2 to pi */
+void silk_apply_sine_window_FLP(
+ silk_float px_win[], /* O Pointer to windowed signal */
+ const silk_float px[], /* I Pointer to input signal */
+ const opus_int win_type, /* I Selects a window type */
+ const opus_int length /* I Window length, multiple of 4 */
+)
+{
+ opus_int k;
+ silk_float freq, c, S0, S1;
+
+ silk_assert( win_type == 1 || win_type == 2 );
+
+ /* Length must be multiple of 4 */
+ silk_assert( ( length & 3 ) == 0 );
+
+ freq = PI / ( length + 1 );
+
+ /* Approximation of 2 * cos(f) */
+ c = 2.0f - freq * freq;
+
+ /* Initialize state */
+ if( win_type < 2 ) {
+ /* Start from 0 */
+ S0 = 0.0f;
+ /* Approximation of sin(f) */
+ S1 = freq;
+ } else {
+ /* Start from 1 */
+ S0 = 1.0f;
+ /* Approximation of cos(f) */
+ S1 = 0.5f * c;
+ }
+
+ /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */
+ /* 4 samples at a time */
+ for( k = 0; k < length; k += 4 ) {
+ px_win[ k + 0 ] = px[ k + 0 ] * 0.5f * ( S0 + S1 );
+ px_win[ k + 1 ] = px[ k + 1 ] * S1;
+ S0 = c * S1 - S0;
+ px_win[ k + 2 ] = px[ k + 2 ] * 0.5f * ( S1 + S0 );
+ px_win[ k + 3 ] = px[ k + 3 ] * S0;
+ S1 = c * S0 - S1;
+ }
+}
diff --git a/drivers/opus/silk/float/autocorrelation_FLP.c b/drivers/opus/silk/float/autocorrelation_FLP.c
new file mode 100644
index 0000000000..192a001b16
--- /dev/null
+++ b/drivers/opus/silk/float/autocorrelation_FLP.c
@@ -0,0 +1,52 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "typedef.h"
+#include "SigProc_FLP.h"
+
+/* compute autocorrelation */
+void silk_autocorrelation_FLP(
+ silk_float *results, /* O result (length correlationCount) */
+ const silk_float *inputData, /* I input data to correlate */
+ opus_int inputDataSize, /* I length of input */
+ opus_int correlationCount /* I number of correlation taps to compute */
+)
+{
+ opus_int i;
+
+ if( correlationCount > inputDataSize ) {
+ correlationCount = inputDataSize;
+ }
+
+ for( i = 0; i < correlationCount; i++ ) {
+ results[ i ] = (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i );
+ }
+}
diff --git a/drivers/opus/silk/float/burg_modified_FLP.c b/drivers/opus/silk/float/burg_modified_FLP.c
new file mode 100644
index 0000000000..0f30ca2280
--- /dev/null
+++ b/drivers/opus/silk/float/burg_modified_FLP.c
@@ -0,0 +1,186 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+#include "tuning_parameters.h"
+#include "define.h"
+
+#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
+
+/* Compute reflection coefficients from input signal */
+silk_float silk_burg_modified_FLP( /* O returns residual energy */
+ silk_float A[], /* O prediction coefficients (length order) */
+ const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
+ const silk_float minInvGain, /* I minimum inverse prediction gain */
+ const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
+ const opus_int nb_subfr, /* I number of subframes stacked in x */
+ const opus_int D /* I order */
+)
+{
+ opus_int k, n, s, reached_max_gain;
+ double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
+ const silk_float *x_ptr;
+ double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
+ double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
+ double Af[ SILK_MAX_ORDER_LPC ];
+
+ silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+ /* Compute autocorrelations, added over subframes */
+ C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
+ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ for( n = 1; n < D + 1; n++ ) {
+ C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
+ }
+ }
+ silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
+
+ /* Initialize */
+ CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
+ invGain = 1.0f;
+ reached_max_gain = 0;
+ for( n = 0; n < D; n++ ) {
+ /* Update first row of correlation matrix (without first element) */
+ /* Update last row of correlation matrix (without last element, stored in reversed order) */
+ /* Update C * Af */
+ /* Update C * flipud(Af) (stored in reversed order) */
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ tmp1 = x_ptr[ n ];
+ tmp2 = x_ptr[ subfr_length - n - 1 ];
+ for( k = 0; k < n; k++ ) {
+ C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
+ C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
+ Atmp = Af[ k ];
+ tmp1 += x_ptr[ n - k - 1 ] * Atmp;
+ tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
+ }
+ for( k = 0; k <= n; k++ ) {
+ CAf[ k ] -= tmp1 * x_ptr[ n - k ];
+ CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
+ }
+ }
+ tmp1 = C_first_row[ n ];
+ tmp2 = C_last_row[ n ];
+ for( k = 0; k < n; k++ ) {
+ Atmp = Af[ k ];
+ tmp1 += C_last_row[ n - k - 1 ] * Atmp;
+ tmp2 += C_first_row[ n - k - 1 ] * Atmp;
+ }
+ CAf[ n + 1 ] = tmp1;
+ CAb[ n + 1 ] = tmp2;
+
+ /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+ num = CAb[ n + 1 ];
+ nrg_b = CAb[ 0 ];
+ nrg_f = CAf[ 0 ];
+ for( k = 0; k < n; k++ ) {
+ Atmp = Af[ k ];
+ num += CAb[ n - k ] * Atmp;
+ nrg_b += CAb[ k + 1 ] * Atmp;
+ nrg_f += CAf[ k + 1 ] * Atmp;
+ }
+ silk_assert( nrg_f > 0.0 );
+ silk_assert( nrg_b > 0.0 );
+
+ /* Calculate the next order reflection (parcor) coefficient */
+ rc = -2.0 * num / ( nrg_f + nrg_b );
+ silk_assert( rc > -1.0 && rc < 1.0 );
+
+ /* Update inverse prediction gain */
+ tmp1 = invGain * ( 1.0 - rc * rc );
+ if( tmp1 <= minInvGain ) {
+ /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+ rc = sqrt( 1.0 - minInvGain / invGain );
+ if( num > 0 ) {
+ /* Ensure adjusted reflection coefficients has the original sign */
+ rc = -rc;
+ }
+ invGain = minInvGain;
+ reached_max_gain = 1;
+ } else {
+ invGain = tmp1;
+ }
+
+ /* Update the AR coefficients */
+ for( k = 0; k < (n + 1) >> 1; k++ ) {
+ tmp1 = Af[ k ];
+ tmp2 = Af[ n - k - 1 ];
+ Af[ k ] = tmp1 + rc * tmp2;
+ Af[ n - k - 1 ] = tmp2 + rc * tmp1;
+ }
+ Af[ n ] = rc;
+
+ if( reached_max_gain ) {
+ /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+ for( k = n + 1; k < D; k++ ) {
+ Af[ k ] = 0.0;
+ }
+ break;
+ }
+
+ /* Update C * Af and C * Ab */
+ for( k = 0; k <= n + 1; k++ ) {
+ tmp1 = CAf[ k ];
+ CAf[ k ] += rc * CAb[ n - k + 1 ];
+ CAb[ n - k + 1 ] += rc * tmp1;
+ }
+ }
+
+ if( reached_max_gain ) {
+ /* Convert to silk_float */
+ for( k = 0; k < D; k++ ) {
+ A[ k ] = (silk_float)( -Af[ k ] );
+ }
+ /* Subtract energy of preceding samples from C0 */
+ for( s = 0; s < nb_subfr; s++ ) {
+ C0 -= silk_energy_FLP( x + s * subfr_length, D );
+ }
+ /* Approximate residual energy */
+ nrg_f = C0 * invGain;
+ } else {
+ /* Compute residual energy and store coefficients as silk_float */
+ nrg_f = CAf[ 0 ];
+ tmp1 = 1.0;
+ for( k = 0; k < D; k++ ) {
+ Atmp = Af[ k ];
+ nrg_f += CAf[ k + 1 ] * Atmp;
+ tmp1 += Atmp * Atmp;
+ A[ k ] = (silk_float)(-Atmp);
+ }
+ nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
+ }
+
+ /* Return residual energy */
+ return (silk_float)nrg_f;
+}
diff --git a/drivers/opus/silk/float/bwexpander_FLP.c b/drivers/opus/silk/float/bwexpander_FLP.c
new file mode 100644
index 0000000000..86154dc3f1
--- /dev/null
+++ b/drivers/opus/silk/float/bwexpander_FLP.c
@@ -0,0 +1,49 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* Chirp (bw expand) LP AR filter */
+void silk_bwexpander_FLP(
+ silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */
+ const opus_int d, /* I length of ar */
+ const silk_float chirp /* I chirp factor (typically in range (0..1) ) */
+)
+{
+ opus_int i;
+ silk_float cfac = chirp;
+
+ for( i = 0; i < d - 1; i++ ) {
+ ar[ i ] *= cfac;
+ cfac *= chirp;
+ }
+ ar[ d - 1 ] *= cfac;
+}
diff --git a/drivers/opus/silk/float/corrMatrix_FLP.c b/drivers/opus/silk/float/corrMatrix_FLP.c
new file mode 100644
index 0000000000..e193c98f11
--- /dev/null
+++ b/drivers/opus/silk/float/corrMatrix_FLP.c
@@ -0,0 +1,93 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/**********************************************************************
+ * Correlation matrix computations for LS estimate.
+ **********************************************************************/
+
+#include "main_FLP.h"
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FLP(
+ const silk_float *x, /* I x vector [L+order-1] used to create X */
+ const silk_float *t, /* I Target vector [L] */
+ const opus_int L, /* I Length of vecors */
+ const opus_int Order, /* I Max lag for correlation */
+ silk_float *Xt /* O X'*t correlation vector [order] */
+)
+{
+ opus_int lag;
+ const silk_float *ptr1;
+
+ ptr1 = &x[ Order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */
+ for( lag = 0; lag < Order; lag++ ) {
+ /* Calculate X[:,lag]'*t */
+ Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L );
+ ptr1--; /* Next column of X */
+ }
+}
+
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FLP(
+ const silk_float *x, /* I x vector [ L+order-1 ] used to create X */
+ const opus_int L, /* I Length of vectors */
+ const opus_int Order, /* I Max lag for correlation */
+ silk_float *XX /* O X'*X correlation matrix [order x order] */
+)
+{
+ opus_int j, lag;
+ double energy;
+ const silk_float *ptr1, *ptr2;
+
+ ptr1 = &x[ Order - 1 ]; /* First sample of column 0 of X */
+ energy = silk_energy_FLP( ptr1, L ); /* X[:,0]'*X[:,0] */
+ matrix_ptr( XX, 0, 0, Order ) = ( silk_float )energy;
+ for( j = 1; j < Order; j++ ) {
+ /* Calculate X[:,j]'*X[:,j] */
+ energy += ptr1[ -j ] * ptr1[ -j ] - ptr1[ L - j ] * ptr1[ L - j ];
+ matrix_ptr( XX, j, j, Order ) = ( silk_float )energy;
+ }
+
+ ptr2 = &x[ Order - 2 ]; /* First sample of column 1 of X */
+ for( lag = 1; lag < Order; lag++ ) {
+ /* Calculate X[:,0]'*X[:,lag] */
+ energy = silk_inner_product_FLP( ptr1, ptr2, L );
+ matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy;
+ matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy;
+ /* Calculate X[:,j]'*X[:,j + lag] */
+ for( j = 1; j < ( Order - lag ); j++ ) {
+ energy += ptr1[ -j ] * ptr2[ -j ] - ptr1[ L - j ] * ptr2[ L - j ];
+ matrix_ptr( XX, lag + j, j, Order ) = ( silk_float )energy;
+ matrix_ptr( XX, j, lag + j, Order ) = ( silk_float )energy;
+ }
+ ptr2--; /* Next column of X */
+ }
+}
diff --git a/drivers/opus/silk/float/encode_frame_FLP.c b/drivers/opus/silk/float/encode_frame_FLP.c
new file mode 100644
index 0000000000..90e5357ced
--- /dev/null
+++ b/drivers/opus/silk/float/encode_frame_FLP.c
@@ -0,0 +1,372 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
+static OPUS_INLINE void silk_LBRR_encode_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ const silk_float xfw[], /* I Input signal */
+ opus_int condCoding /* I The type of conditional coding used so far for this frame */
+);
+
+void silk_encode_do_VAD_FLP(
+ silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
+)
+{
+ /****************************/
+ /* Voice Activity Detection */
+ /****************************/
+ silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+
+ /**************************************************/
+ /* Convert speech activity into VAD and DTX flags */
+ /**************************************************/
+ if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+ psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
+ psEnc->sCmn.noSpeechCounter++;
+ if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+ psEnc->sCmn.inDTX = 0;
+ } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
+ psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
+ psEnc->sCmn.inDTX = 0;
+ }
+ psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
+ } else {
+ psEnc->sCmn.noSpeechCounter = 0;
+ psEnc->sCmn.inDTX = 0;
+ psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+ psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+ }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ opus_int32 *pnBytesOut, /* O Number of payload bytes; */
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ opus_int condCoding, /* I The type of conditional coding to use */
+ opus_int maxBits, /* I If > 0: maximum number of output bits */
+ opus_int useCBR /* I Flag to force constant-bitrate operation */
+)
+{
+ silk_encoder_control_FLP sEncCtrl;
+ opus_int i, iter, maxIter, found_upper, found_lower, ret = 0;
+ silk_float *x_frame, *res_pitch_frame;
+ silk_float xfw[ MAX_FRAME_LENGTH ];
+ silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+ ec_enc sRangeEnc_copy, sRangeEnc_copy2;
+ silk_nsq_state sNSQ_copy, sNSQ_copy2;
+ opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
+ opus_int32 gainsID, gainsID_lower, gainsID_upper;
+ opus_int16 gainMult_Q8;
+ opus_int16 ec_prevLagIndex_copy;
+ opus_int ec_prevSignalType_copy;
+ opus_int8 LastGainIndex_copy2;
+ opus_int32 pGains_Q16[ MAX_NB_SUBFR ];
+ opus_uint8 ec_buf_copy[ 1275 ];
+
+ /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
+ LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
+
+ psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+ /**************************************************************/
+ /* Set up Input Pointers, and insert frame in input buffer */
+ /**************************************************************/
+ /* pointers aligned with start of frame to encode */
+ x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
+ res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
+
+ /***************************************/
+ /* Ensure smooth bandwidth transitions */
+ /***************************************/
+ silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+ /*******************************************/
+ /* Copy new frame to front of input buffer */
+ /*******************************************/
+ silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+ /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
+ for( i = 0; i < 8; i++ ) {
+ x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
+ }
+
+ if( !psEnc->sCmn.prefillFlag ) {
+ /*****************************************/
+ /* Find pitch lags, initial LPC analysis */
+ /*****************************************/
+ silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
+
+ /************************/
+ /* Noise shape analysis */
+ /************************/
+ silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
+
+ /***************************************************/
+ /* Find linear prediction coefficients (LPC + LTP) */
+ /***************************************************/
+ silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
+
+ /****************************************/
+ /* Process gains */
+ /****************************************/
+ silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding );
+
+ /*****************************************/
+ /* Prefiltering for noise shaper */
+ /*****************************************/
+ silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame );
+
+ /****************************************/
+ /* Low Bitrate Redundant Encoding */
+ /****************************************/
+ silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding );
+
+ /* Loop over quantizer and entroy coding to control bitrate */
+ maxIter = 6;
+ gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
+ found_lower = 0;
+ found_upper = 0;
+ gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+ gainsID_lower = -1;
+ gainsID_upper = -1;
+ /* Copy part of the input state */
+ silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
+ silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+ seed_copy = psEnc->sCmn.indices.Seed;
+ ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
+ ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
+ for( iter = 0; ; iter++ ) {
+ if( gainsID == gainsID_lower ) {
+ nBits = nBits_lower;
+ } else if( gainsID == gainsID_upper ) {
+ nBits = nBits_upper;
+ } else {
+ /* Restore part of the input state */
+ if( iter > 0 ) {
+ silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
+ silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
+ psEnc->sCmn.indices.Seed = seed_copy;
+ psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
+ psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
+ }
+
+ /*****************************************/
+ /* Noise shaping quantization */
+ /*****************************************/
+ silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw );
+
+ /****************************************/
+ /* Encode Parameters */
+ /****************************************/
+ silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
+
+ /****************************************/
+ /* Encode Excitation Signal */
+ /****************************************/
+ silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
+ psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
+
+ nBits = ec_tell( psRangeEnc );
+
+ if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
+ break;
+ }
+ }
+
+ if( iter == maxIter ) {
+ if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
+ /* Restore output state from earlier iteration that did meet the bitrate budget */
+ silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
+ silk_assert( sRangeEnc_copy2.offs <= 1275 );
+ silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
+ silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
+ psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
+ }
+ break;
+ }
+
+ if( nBits > maxBits ) {
+ if( found_lower == 0 && iter >= 2 ) {
+ /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
+ sEncCtrl.Lambda *= 1.5f;
+ found_upper = 0;
+ gainsID_upper = -1;
+ } else {
+ found_upper = 1;
+ nBits_upper = nBits;
+ gainMult_upper = gainMult_Q8;
+ gainsID_upper = gainsID;
+ }
+ } else if( nBits < maxBits - 5 ) {
+ found_lower = 1;
+ nBits_lower = nBits;
+ gainMult_lower = gainMult_Q8;
+ if( gainsID != gainsID_lower ) {
+ gainsID_lower = gainsID;
+ /* Copy part of the output state */
+ silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
+ silk_assert( psRangeEnc->offs <= 1275 );
+ silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
+ silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+ LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
+ }
+ } else {
+ /* Within 5 bits of budget: close enough */
+ break;
+ }
+
+ if( ( found_lower & found_upper ) == 0 ) {
+ /* Adjust gain according to high-rate rate/distortion curve */
+ opus_int32 gain_factor_Q16;
+ gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
+ gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
+ if( nBits > maxBits ) {
+ gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
+ }
+ gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+ } else {
+ /* Adjust gain by interpolating */
+ gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower );
+ /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
+ if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
+ gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
+ } else
+ if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
+ gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
+ }
+ }
+
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
+ }
+
+ /* Quantize gains */
+ psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
+ silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
+ &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+ /* Unique identifier of gains vector */
+ gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+
+ /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f;
+ }
+ }
+ }
+
+ /* Update input buffer */
+ silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
+ ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
+
+ /* Exit without entropy coding */
+ if( psEnc->sCmn.prefillFlag ) {
+ /* No payload */
+ *pnBytesOut = 0;
+ return ret;
+ }
+
+ /* Parameters needed for next frame */
+ psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+ psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
+ /****************************************/
+ /* Finalize payload */
+ /****************************************/
+ psEnc->sCmn.first_frame_after_reset = 0;
+ /* Payload size */
+ *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
+
+ return ret;
+}
+
+/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */
+static OPUS_INLINE void silk_LBRR_encode_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ const silk_float xfw[], /* I Input signal */
+ opus_int condCoding /* I The type of conditional coding used so far for this frame */
+)
+{
+ opus_int k;
+ opus_int32 Gains_Q16[ MAX_NB_SUBFR ];
+ silk_float TempGains[ MAX_NB_SUBFR ];
+ SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
+ silk_nsq_state sNSQ_LBRR;
+
+ /*******************************************/
+ /* Control use of inband LBRR */
+ /*******************************************/
+ if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
+ psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+
+ /* Copy noise shaping quantizer state and quantization indices from regular encoding */
+ silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+ silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
+
+ /* Save original gains */
+ silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
+
+ if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
+ /* First frame in packet or previous frame not LBRR coded */
+ psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
+
+ /* Increase Gains to get target LBRR rate */
+ psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
+ psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
+ }
+
+ /* Decode to get gains in sync with decoder */
+ silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
+ &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+ /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
+ }
+
+ /*****************************************/
+ /* Noise shaping quantization */
+ /*****************************************/
+ silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
+ psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
+
+ /* Restore original gains */
+ silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
+ }
+}
diff --git a/drivers/opus/silk/float/energy_FLP.c b/drivers/opus/silk/float/energy_FLP.c
new file mode 100644
index 0000000000..d441526df3
--- /dev/null
+++ b/drivers/opus/silk/float/energy_FLP.c
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* sum of squares of a silk_float array, with result as double */
+double silk_energy_FLP(
+ const silk_float *data,
+ opus_int dataSize
+)
+{
+ opus_int i, dataSize4;
+ double result;
+
+ /* 4x unrolled loop */
+ result = 0.0;
+ dataSize4 = dataSize & 0xFFFC;
+ for( i = 0; i < dataSize4; i += 4 ) {
+ result += data[ i + 0 ] * (double)data[ i + 0 ] +
+ data[ i + 1 ] * (double)data[ i + 1 ] +
+ data[ i + 2 ] * (double)data[ i + 2 ] +
+ data[ i + 3 ] * (double)data[ i + 3 ];
+ }
+
+ /* add any remaining products */
+ for( ; i < dataSize; i++ ) {
+ result += data[ i ] * (double)data[ i ];
+ }
+
+ silk_assert( result >= 0.0 );
+ return result;
+}
diff --git a/drivers/opus/silk/float/find_LPC_FLP.c b/drivers/opus/silk/float/find_LPC_FLP.c
new file mode 100644
index 0000000000..212f2de3cd
--- /dev/null
+++ b/drivers/opus/silk/float/find_LPC_FLP.c
@@ -0,0 +1,104 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "define.h"
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* LPC analysis */
+void silk_find_LPC_FLP(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ opus_int16 NLSF_Q15[], /* O NLSFs */
+ const silk_float x[], /* I Input signal */
+ const silk_float minInvGain /* I Inverse of max prediction gain */
+)
+{
+ opus_int k, subfr_length;
+ silk_float a[ MAX_LPC_ORDER ];
+
+ /* Used only for NLSF interpolation */
+ silk_float res_nrg, res_nrg_2nd, res_nrg_interp;
+ opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ];
+ silk_float a_tmp[ MAX_LPC_ORDER ];
+ silk_float LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ];
+
+ subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
+
+ /* Default: No interpolation */
+ psEncC->indices.NLSFInterpCoef_Q2 = 4;
+
+ /* Burg AR analysis for the full frame */
+ res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
+
+ if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
+ /* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than */
+ /* adding it to the residual energy of the first 10 ms in each iteration of the search below */
+ res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder );
+
+ /* Convert to NLSFs */
+ silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder );
+
+ /* Search over interpolation indices to find the one with lowest residual energy */
+ res_nrg_2nd = silk_float_MAX;
+ for( k = 3; k >= 0; k-- ) {
+ /* Interpolate NLSFs for first half */
+ silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder );
+
+ /* Convert to LPC for residual energy evaluation */
+ silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder );
+
+ /* Calculate residual energy with LSF interpolation */
+ silk_LPC_analysis_filter_FLP( LPC_res, a_tmp, x, 2 * subfr_length, psEncC->predictLPCOrder );
+ res_nrg_interp = (silk_float)(
+ silk_energy_FLP( LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ) +
+ silk_energy_FLP( LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ) );
+
+ /* Determine whether current interpolated NLSFs are best so far */
+ if( res_nrg_interp < res_nrg ) {
+ /* Interpolation has lower residual energy */
+ res_nrg = res_nrg_interp;
+ psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k;
+ } else if( res_nrg_interp > res_nrg_2nd ) {
+ /* No reason to continue iterating - residual energies will continue to climb */
+ break;
+ }
+ res_nrg_2nd = res_nrg_interp;
+ }
+ }
+
+ if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
+ /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
+ silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder );
+ }
+
+ silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 ||
+ ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+}
diff --git a/drivers/opus/silk/float/find_LTP_FLP.c b/drivers/opus/silk/float/find_LTP_FLP.c
new file mode 100644
index 0000000000..5c62851f20
--- /dev/null
+++ b/drivers/opus/silk/float/find_LTP_FLP.c
@@ -0,0 +1,132 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+void silk_find_LTP_FLP(
+ silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */
+ silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */
+ silk_float *LTPredCodGain, /* O LTP coding gain */
+ const silk_float r_lpc[], /* I LPC residual */
+ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
+ const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */
+ const opus_int subfr_length, /* I Subframe length */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int mem_offset /* I Number of samples in LTP memory */
+)
+{
+ opus_int i, k;
+ silk_float *b_ptr, temp, *WLTP_ptr;
+ silk_float LPC_res_nrg, LPC_LTP_res_nrg;
+ silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ];
+ silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu;
+ silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];
+ const silk_float *r_ptr, *lag_ptr;
+
+ b_ptr = b;
+ WLTP_ptr = WLTP;
+ r_ptr = &r_lpc[ mem_offset ];
+ for( k = 0; k < nb_subfr; k++ ) {
+ lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
+
+ silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr );
+ silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr );
+
+ rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length );
+ regu = 1.0f + rr[ k ] +
+ matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) +
+ matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER );
+ regu *= LTP_DAMPING / 3;
+ silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER );
+ silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr );
+
+ /* Calculate residual energy */
+ nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER );
+
+ temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length );
+ silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER );
+ w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER );
+
+ r_ptr += subfr_length;
+ b_ptr += LTP_ORDER;
+ WLTP_ptr += LTP_ORDER * LTP_ORDER;
+ }
+
+ /* Compute LTP coding gain */
+ if( LTPredCodGain != NULL ) {
+ LPC_LTP_res_nrg = 1e-6f;
+ LPC_res_nrg = 0.0f;
+ for( k = 0; k < nb_subfr; k++ ) {
+ LPC_res_nrg += rr[ k ] * Wght[ k ];
+ LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ];
+ }
+
+ silk_assert( LPC_LTP_res_nrg > 0 );
+ *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg );
+ }
+
+ /* Smoothing */
+ /* d = sum( B, 1 ); */
+ b_ptr = b;
+ for( k = 0; k < nb_subfr; k++ ) {
+ d[ k ] = 0;
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ d[ k ] += b_ptr[ i ];
+ }
+ b_ptr += LTP_ORDER;
+ }
+ /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */
+ temp = 1e-3f;
+ for( k = 0; k < nb_subfr; k++ ) {
+ temp += w[ k ];
+ }
+ m = 0;
+ for( k = 0; k < nb_subfr; k++ ) {
+ m += d[ k ] * w[ k ];
+ }
+ m = m / temp;
+
+ b_ptr = b;
+ for( k = 0; k < nb_subfr; k++ ) {
+ g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] );
+ temp = 0;
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f );
+ temp += delta_b[ i ];
+ }
+ temp = g / temp;
+ for( i = 0; i < LTP_ORDER; i++ ) {
+ b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp;
+ }
+ b_ptr += LTP_ORDER;
+ }
+}
diff --git a/drivers/opus/silk/float/find_pitch_lags_FLP.c b/drivers/opus/silk/float/find_pitch_lags_FLP.c
new file mode 100644
index 0000000000..d74d5941b5
--- /dev/null
+++ b/drivers/opus/silk/float/find_pitch_lags_FLP.c
@@ -0,0 +1,132 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdlib.h>
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+void silk_find_pitch_lags_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ silk_float res[], /* O Residual */
+ const silk_float x[], /* I Speech signal */
+ int arch /* I Run-time architecture */
+)
+{
+ opus_int buf_len;
+ silk_float thrhld, res_nrg;
+ const silk_float *x_buf_ptr, *x_buf;
+ silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
+ silk_float A[ MAX_FIND_PITCH_LPC_ORDER ];
+ silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ];
+ silk_float Wsig[ FIND_PITCH_LPC_WIN_MAX ];
+ silk_float *Wsig_ptr;
+
+ /******************************************/
+ /* Set up buffer lengths etc based on Fs */
+ /******************************************/
+ buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
+
+ /* Safety check */
+ silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+
+ x_buf = x - psEnc->sCmn.ltp_mem_length;
+
+ /******************************************/
+ /* Estimate LPC AR coeficients */
+ /******************************************/
+
+ /* Calculate windowed signal */
+
+ /* First LA_LTP samples */
+ x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
+ Wsig_ptr = Wsig;
+ silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch );
+
+ /* Middle non-windowed samples */
+ Wsig_ptr += psEnc->sCmn.la_pitch;
+ x_buf_ptr += psEnc->sCmn.la_pitch;
+ silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ) ) * sizeof( silk_float ) );
+
+ /* Last LA_LTP samples */
+ Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 );
+ x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 );
+ silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
+
+ /* Calculate autocorrelation sequence */
+ silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );
+
+ /* Add white noise, as a fraction of the energy */
+ auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1;
+
+ /* Calculate the reflection coefficients using Schur */
+ res_nrg = silk_schur_FLP( refl_coef, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder );
+
+ /* Prediction gain */
+ psEncCtrl->predGain = auto_corr[ 0 ] / silk_max_float( res_nrg, 1.0f );
+
+ /* Convert reflection coefficients to prediction coefficients */
+ silk_k2a_FLP( A, refl_coef, psEnc->sCmn.pitchEstimationLPCOrder );
+
+ /* Bandwidth expansion */
+ silk_bwexpander_FLP( A, psEnc->sCmn.pitchEstimationLPCOrder, FIND_PITCH_BANDWIDTH_EXPANSION );
+
+ /*****************************************/
+ /* LPC analysis filtering */
+ /*****************************************/
+ silk_LPC_analysis_filter_FLP( res, A, x_buf, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+
+ if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
+ /* Threshold for pitch estimator */
+ thrhld = 0.6f;
+ thrhld -= 0.004f * psEnc->sCmn.pitchEstimationLPCOrder;
+ thrhld -= 0.1f * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f );
+ thrhld -= 0.15f * (psEnc->sCmn.prevSignalType >> 1);
+ thrhld -= 0.1f * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f );
+
+ /*****************************************/
+ /* Call Pitch estimator */
+ /*****************************************/
+ if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex,
+ &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f,
+ thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 )
+ {
+ psEnc->sCmn.indices.signalType = TYPE_VOICED;
+ } else {
+ psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+ }
+ } else {
+ silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
+ psEnc->sCmn.indices.lagIndex = 0;
+ psEnc->sCmn.indices.contourIndex = 0;
+ psEnc->LTPCorr = 0;
+ }
+}
diff --git a/drivers/opus/silk/float/find_pred_coefs_FLP.c b/drivers/opus/silk/float/find_pred_coefs_FLP.c
new file mode 100644
index 0000000000..e0d8804cc9
--- /dev/null
+++ b/drivers/opus/silk/float/find_pred_coefs_FLP.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ const silk_float res_pitch[], /* I Residual from pitch analysis */
+ const silk_float x[], /* I Speech signal */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ opus_int i;
+ silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];
+ silk_float invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ];
+ opus_int16 NLSF_Q15[ MAX_LPC_ORDER ];
+ const silk_float *x_ptr;
+ silk_float *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ];
+ silk_float minInvGain;
+
+ /* Weighting for weighted least squares */
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ silk_assert( psEncCtrl->Gains[ i ] > 0.0f );
+ invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ];
+ Wght[ i ] = invGains[ i ] * invGains[ i ];
+ }
+
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /**********/
+ /* VOICED */
+ /**********/
+ silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+
+ /* LTP analysis */
+ silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch,
+ psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length );
+
+ /* Quantize LTP gain parameters */
+ silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
+ &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );
+
+ /* Control LTP scaling */
+ silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding );
+
+ /* Create LTP residual */
+ silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef,
+ psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+ } else {
+ /************/
+ /* UNVOICED */
+ /************/
+ /* Create signal with prepended subframes, scaled by inverse gains */
+ x_ptr = x - psEnc->sCmn.predictLPCOrder;
+ x_pre_ptr = LPC_in_pre;
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ],
+ psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
+ x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
+ x_ptr += psEnc->sCmn.subfr_length;
+ }
+ silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
+ psEncCtrl->LTPredCodGain = 0.0f;
+ psEnc->sCmn.sum_log_gain_Q7 = 0;
+ }
+
+ /* Limit on total predictive coding gain */
+ if( psEnc->sCmn.first_frame_after_reset ) {
+ minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET;
+ } else {
+ minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) / MAX_PREDICTION_POWER_GAIN;
+ minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality;
+ }
+
+ /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
+ silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain );
+
+ /* Quantize LSFs */
+ silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
+
+ /* Calculate residual energy using quantized LPC coefficients */
+ silk_residual_energy_FLP( psEncCtrl->ResNrg, LPC_in_pre, psEncCtrl->PredCoef, psEncCtrl->Gains,
+ psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+ /* Copy to prediction struct for use in next frame for interpolation */
+ silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+}
+
diff --git a/drivers/opus/silk/float/inner_product_FLP.c b/drivers/opus/silk/float/inner_product_FLP.c
new file mode 100644
index 0000000000..57acf5ffba
--- /dev/null
+++ b/drivers/opus/silk/float/inner_product_FLP.c
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* inner product of two silk_float arrays, with result as double */
+double silk_inner_product_FLP(
+ const silk_float *data1,
+ const silk_float *data2,
+ opus_int dataSize
+)
+{
+ opus_int i, dataSize4;
+ double result;
+
+ /* 4x unrolled loop */
+ result = 0.0;
+ dataSize4 = dataSize & 0xFFFC;
+ for( i = 0; i < dataSize4; i += 4 ) {
+ result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
+ data1[ i + 1 ] * (double)data2[ i + 1 ] +
+ data1[ i + 2 ] * (double)data2[ i + 2 ] +
+ data1[ i + 3 ] * (double)data2[ i + 3 ];
+ }
+
+ /* add any remaining products */
+ for( ; i < dataSize; i++ ) {
+ result += data1[ i ] * (double)data2[ i ];
+ }
+
+ return result;
+}
diff --git a/drivers/opus/silk/float/k2a_FLP.c b/drivers/opus/silk/float/k2a_FLP.c
new file mode 100644
index 0000000000..a668a32127
--- /dev/null
+++ b/drivers/opus/silk/float/k2a_FLP.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_FLP(
+ silk_float *A, /* O prediction coefficients [order] */
+ const silk_float *rc, /* I reflection coefficients [order] */
+ opus_int32 order /* I prediction order */
+)
+{
+ opus_int k, n;
+ silk_float Atmp[ SILK_MAX_ORDER_LPC ];
+
+ for( k = 0; k < order; k++ ) {
+ for( n = 0; n < k; n++ ) {
+ Atmp[ n ] = A[ n ];
+ }
+ for( n = 0; n < k; n++ ) {
+ A[ n ] += Atmp[ k - n - 1 ] * rc[ k ];
+ }
+ A[ k ] = -rc[ k ];
+ }
+}
diff --git a/drivers/opus/silk/float/levinsondurbin_FLP.c b/drivers/opus/silk/float/levinsondurbin_FLP.c
new file mode 100644
index 0000000000..64aaf0fb29
--- /dev/null
+++ b/drivers/opus/silk/float/levinsondurbin_FLP.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* Solve the normal equations using the Levinson-Durbin recursion */
+silk_float silk_levinsondurbin_FLP( /* O prediction error energy */
+ silk_float A[], /* O prediction coefficients [order] */
+ const silk_float corr[], /* I input auto-correlations [order + 1] */
+ const opus_int order /* I prediction order */
+)
+{
+ opus_int i, mHalf, m;
+ silk_float min_nrg, nrg, t, km, Atmp1, Atmp2;
+
+ min_nrg = 1e-12f * corr[ 0 ] + 1e-9f;
+ nrg = corr[ 0 ];
+ nrg = silk_max_float(min_nrg, nrg);
+ A[ 0 ] = corr[ 1 ] / nrg;
+ nrg -= A[ 0 ] * corr[ 1 ];
+ nrg = silk_max_float(min_nrg, nrg);
+
+ for( m = 1; m < order; m++ )
+ {
+ t = corr[ m + 1 ];
+ for( i = 0; i < m; i++ ) {
+ t -= A[ i ] * corr[ m - i ];
+ }
+
+ /* reflection coefficient */
+ km = t / nrg;
+
+ /* residual energy */
+ nrg -= km * t;
+ nrg = silk_max_float(min_nrg, nrg);
+
+ mHalf = m >> 1;
+ for( i = 0; i < mHalf; i++ ) {
+ Atmp1 = A[ i ];
+ Atmp2 = A[ m - i - 1 ];
+ A[ m - i - 1 ] -= km * Atmp1;
+ A[ i ] -= km * Atmp2;
+ }
+ if( m & 1 ) {
+ A[ mHalf ] -= km * A[ mHalf ];
+ }
+ A[ m ] = km;
+ }
+
+ /* return the residual energy */
+ return nrg;
+}
+
diff --git a/drivers/opus/silk/float/main_FLP.h b/drivers/opus/silk/float/main_FLP.h
new file mode 100644
index 0000000000..92d6ec3df1
--- /dev/null
+++ b/drivers/opus/silk/float/main_FLP.h
@@ -0,0 +1,312 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_FLP_H
+#define SILK_MAIN_FLP_H
+
+#include "SigProc_FLP.h"
+#include "SigProc_FIX.h"
+#include "structs_FLP.h"
+#include "silk_main.h"
+#include "define.h"
+#include "debug.h"
+#include "entenc.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define silk_encoder_state_Fxx silk_encoder_state_FLP
+#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FLP
+#define silk_encode_frame_Fxx silk_encode_frame_FLP
+
+/*********************/
+/* Encoder Functions */
+/*********************/
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+ silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FLP(
+ silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
+);
+
+/* Encoder main function */
+opus_int silk_encode_frame_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ opus_int32 *pnBytesOut, /* O Number of payload bytes; */
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ opus_int condCoding, /* I The type of conditional coding to use */
+ opus_int maxBits, /* I If > 0: maximum number of output bits */
+ opus_int useCBR /* I Flag to force constant-bitrate operation */
+);
+
+/* Initializes the Silk encoder state */
+opus_int silk_init_encoder(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ int arch /* I Run-tim architecture */
+);
+
+/* Control the Silk encoder */
+opus_int silk_control_encoder(
+ silk_encoder_state_FLP *psEnc, /* I/O Pointer to Silk encoder state FLP */
+ silk_EncControlStruct *encControl, /* I Control structure */
+ const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */
+ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */
+ const opus_int channelNb, /* I Channel number */
+ const opus_int force_fs_kHz
+);
+
+/****************/
+/* Prefiltering */
+/****************/
+void silk_prefilter_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */
+ silk_float xw[], /* O Weighted signal */
+ const silk_float x[] /* I Speech signal */
+);
+
+/**************************/
+/* Noise shaping analysis */
+/**************************/
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ const silk_float *pitch_res, /* I LPC residual from pitch analysis */
+ const silk_float *x /* I Input signal [frame_length + la_shape] */
+);
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FLP(
+ silk_float *corr, /* O Result [order + 1] */
+ const silk_float *input, /* I Input data to correlate */
+ const silk_float warping, /* I Warping coefficient */
+ const opus_int length, /* I Length of input */
+ const opus_int order /* I Correlation order (even) */
+);
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/**********************************************/
+/* Prediction Analysis */
+/**********************************************/
+/* Find pitch lags */
+void silk_find_pitch_lags_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ silk_float res[], /* O Residual */
+ const silk_float x[], /* I Speech signal */
+ int arch /* I Run-time architecture */
+);
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ const silk_float res_pitch[], /* I Residual from pitch analysis */
+ const silk_float x[], /* I Speech signal */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/* LPC analysis */
+void silk_find_LPC_FLP(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ opus_int16 NLSF_Q15[], /* O NLSFs */
+ const silk_float x[], /* I Input signal */
+ const silk_float minInvGain /* I Prediction gain from LTP (dB) */
+);
+
+/* LTP analysis */
+void silk_find_LTP_FLP(
+ silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */
+ silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */
+ silk_float *LTPredCodGain, /* O LTP coding gain */
+ const silk_float r_lpc[], /* I LPC residual */
+ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
+ const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */
+ const opus_int subfr_length, /* I Subframe length */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int mem_offset /* I Number of samples in LTP memory */
+);
+
+void silk_LTP_analysis_filter_FLP(
+ silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */
+ const silk_float *x, /* I Input signal, with preceding samples */
+ const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
+ const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */
+ const opus_int subfr_length, /* I Length of each subframe */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int pre_length /* I Preceding samples for each subframe */
+);
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order */
+/* of preceding samples */
+void silk_residual_energy_FLP(
+ silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */
+ const silk_float x[], /* I Input signal */
+ silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */
+ const silk_float gains[], /* I Quantization gains */
+ const opus_int subfr_length, /* I Subframe length */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int LPC_order /* I LPC order */
+);
+
+/* 16th order LPC analysis filter */
+void silk_LPC_analysis_filter_FLP(
+ silk_float r_LPC[], /* O LPC residual signal */
+ const silk_float PredCoef[], /* I LPC coefficients */
+ const silk_float s[], /* I Input signal */
+ const opus_int length, /* I Length of input signal */
+ const opus_int Order /* I LPC order */
+);
+
+/* LTP tap quantizer */
+void silk_quant_LTP_gains_FLP(
+ silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */
+ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */
+ opus_int8 *periodicity_index, /* O Periodicity index */
+ opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */
+ const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */
+ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */
+ const opus_int lowComplexity, /* I Flag for low complexity */
+ const opus_int nb_subfr /* I number of subframes */
+);
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */
+ const silk_float *c, /* I Filter coefficients */
+ silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */
+ const silk_float *wXx, /* I Weighted correlation vector */
+ const silk_float wxx, /* I Weighted correlation value */
+ const opus_int D /* I Dimension */
+);
+
+/* Processing of gains */
+void silk_process_gains_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/******************/
+/* Linear Algebra */
+/******************/
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FLP(
+ const silk_float *x, /* I x vector [ L+order-1 ] used to create X */
+ const opus_int L, /* I Length of vectors */
+ const opus_int Order, /* I Max lag for correlation */
+ silk_float *XX /* O X'*X correlation matrix [order x order] */
+);
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FLP(
+ const silk_float *x, /* I x vector [L+order-1] used to create X */
+ const silk_float *t, /* I Target vector [L] */
+ const opus_int L, /* I Length of vecors */
+ const opus_int Order, /* I Max lag for correlation */
+ silk_float *Xt /* O X'*t correlation vector [order] */
+);
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FLP(
+ silk_float *XX, /* I/O Correlation matrices */
+ silk_float *xx, /* I/O Correlation values */
+ const silk_float noise, /* I Noise energy to add */
+ const opus_int D /* I Dimension of XX */
+);
+
+/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */
+void silk_solve_LDL_FLP(
+ silk_float *A, /* I/O Symmetric square matrix, out: reg. */
+ const opus_int M, /* I Size of matrix */
+ const silk_float *b, /* I Pointer to b vector */
+ silk_float *x /* O Pointer to x solution vector */
+);
+
+/* Apply sine window to signal vector. */
+/* Window types: */
+/* 1 -> sine window from 0 to pi/2 */
+/* 2 -> sine window from pi/2 to pi */
+void silk_apply_sine_window_FLP(
+ silk_float px_win[], /* O Pointer to windowed signal */
+ const silk_float px[], /* I Pointer to input signal */
+ const opus_int win_type, /* I Selects a window type */
+ const opus_int length /* I Window length, multiple of 4 */
+);
+
+/* Wrapper functions. Call flp / fix code */
+
+/* Convert AR filter coefficients to NLSF parameters */
+void silk_A2NLSF_FLP(
+ opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */
+ const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */
+ const opus_int LPC_order /* I LPC order */
+);
+
+/* Convert NLSF parameters to AR prediction filter coefficients */
+void silk_NLSF2A_FLP(
+ silk_float *pAR, /* O LPC coefficients [ LPC_order ] */
+ const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */
+ const opus_int LPC_order /* I LPC order */
+);
+
+/* Limit, stabilize, and quantize NLSFs */
+void silk_process_NLSFs_FLP(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */
+ opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */
+ const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */
+);
+
+/* Floating-point Silk NSQ wrapper */
+void silk_NSQ_wrapper_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ SideInfoIndices *psIndices, /* I/O Quantization indices */
+ silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const silk_float x[] /* I Prefiltered input signal */
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/float/noise_shape_analysis_FLP.c b/drivers/opus/silk/float/noise_shape_analysis_FLP.c
new file mode 100644
index 0000000000..f80e0b3d0e
--- /dev/null
+++ b/drivers/opus/silk/float/noise_shape_analysis_FLP.c
@@ -0,0 +1,365 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */
+/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
+/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
+/* coefficient in an array of coefficients, for monic filters. */
+static OPUS_INLINE silk_float warped_gain(
+ const silk_float *coefs,
+ silk_float lambda,
+ opus_int order
+) {
+ opus_int i;
+ silk_float gain;
+
+ lambda = -lambda;
+ gain = coefs[ order - 1 ];
+ for( i = order - 2; i >= 0; i-- ) {
+ gain = lambda * gain + coefs[ i ];
+ }
+ return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) );
+}
+
+/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */
+/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
+static OPUS_INLINE void warped_true2monic_coefs(
+ silk_float *coefs_syn,
+ silk_float *coefs_ana,
+ silk_float lambda,
+ silk_float limit,
+ opus_int order
+) {
+ opus_int i, iter, ind = 0;
+ silk_float tmp, maxabs, chirp, gain_syn, gain_ana;
+
+ /* Convert to monic coefficients */
+ for( i = order - 1; i > 0; i-- ) {
+ coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ];
+ coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ];
+ }
+ gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] );
+ gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] );
+ for( i = 0; i < order; i++ ) {
+ coefs_syn[ i ] *= gain_syn;
+ coefs_ana[ i ] *= gain_ana;
+ }
+
+ /* Limit */
+ for( iter = 0; iter < 10; iter++ ) {
+ /* Find maximum absolute value */
+ maxabs = -1.0f;
+ for( i = 0; i < order; i++ ) {
+ tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) );
+ if( tmp > maxabs ) {
+ maxabs = tmp;
+ ind = i;
+ }
+ }
+ if( maxabs <= limit ) {
+ /* Coefficients are within range - done */
+ return;
+ }
+
+ /* Convert back to true warped coefficients */
+ for( i = 1; i < order; i++ ) {
+ coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ];
+ coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ];
+ }
+ gain_syn = 1.0f / gain_syn;
+ gain_ana = 1.0f / gain_ana;
+ for( i = 0; i < order; i++ ) {
+ coefs_syn[ i ] *= gain_syn;
+ coefs_ana[ i ] *= gain_ana;
+ }
+
+ /* Apply bandwidth expansion */
+ chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) );
+ silk_bwexpander_FLP( coefs_syn, order, chirp );
+ silk_bwexpander_FLP( coefs_ana, order, chirp );
+
+ /* Convert to monic warped coefficients */
+ for( i = order - 1; i > 0; i-- ) {
+ coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ];
+ coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ];
+ }
+ gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] );
+ gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] );
+ for( i = 0; i < order; i++ ) {
+ coefs_syn[ i ] *= gain_syn;
+ coefs_ana[ i ] *= gain_ana;
+ }
+ }
+ silk_assert( 0 );
+}
+
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ const silk_float *pitch_res, /* I LPC residual from pitch analysis */
+ const silk_float *x /* I Input signal [frame_length + la_shape] */
+)
+{
+ silk_shape_state_FLP *psShapeSt = &psEnc->sShape;
+ opus_int k, nSamples;
+ silk_float SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt;
+ silk_float nrg, pre_nrg, log_energy, log_energy_prev, energy_variation;
+ silk_float delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping;
+ silk_float x_windowed[ SHAPE_LPC_WIN_MAX ];
+ silk_float auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ];
+ const silk_float *x_ptr, *pitch_res_ptr;
+
+ /* Point to start of first LPC analysis block */
+ x_ptr = x - psEnc->sCmn.la_shape;
+
+ /****************/
+ /* GAIN CONTROL */
+ /****************/
+ SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f );
+
+ /* Input quality is the average of the quality in the lowest two VAD bands */
+ psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f );
+
+ /* Coding quality level, between 0.0 and 1.0 */
+ psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) );
+
+ if( psEnc->sCmn.useCBR == 0 ) {
+ /* Reduce coding SNR during low speech activity */
+ b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f );
+ SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b;
+ }
+
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Reduce gains for periodic signals */
+ SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr;
+ } else {
+ /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+ SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality );
+ }
+
+ /*************************/
+ /* SPARSENESS PROCESSING */
+ /*************************/
+ /* Set quantizer offset */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Initially set to 0; may be overruled in process_gains(..) */
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ psEncCtrl->sparseness = 0.0f;
+ } else {
+ /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+ nSamples = 2 * psEnc->sCmn.fs_kHz;
+ energy_variation = 0.0f;
+ log_energy_prev = 0.0f;
+ pitch_res_ptr = pitch_res;
+ for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+ nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples );
+ log_energy = silk_log2( nrg );
+ if( k > 0 ) {
+ energy_variation += silk_abs_float( log_energy - log_energy_prev );
+ }
+ log_energy_prev = log_energy;
+ pitch_res_ptr += nSamples;
+ }
+ psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) );
+
+ /* Set quantization offset depending on sparseness measure */
+ if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) {
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ } else {
+ psEnc->sCmn.indices.quantOffsetType = 1;
+ }
+
+ /* Increase coding SNR for sparse signals */
+ SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f );
+ }
+
+ /*******************************/
+ /* Control bandwidth expansion */
+ /*******************************/
+ /* More BWE for signals with high prediction gain */
+ strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain; /* between 0.0 and 1.0 */
+ BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength );
+ delta = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality );
+ BWExp1 -= delta;
+ BWExp2 += delta;
+ /* BWExp1 will be applied after BWExp2, so make it relative */
+ BWExp1 /= BWExp2;
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+ warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality;
+ } else {
+ warping = 0.0f;
+ }
+
+ /********************************************/
+ /* Compute noise shaping AR coefs and gains */
+ /********************************************/
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ /* Apply window: sine slope followed by flat part followed by cosine slope */
+ opus_int shift, slope_part, flat_part;
+ flat_part = psEnc->sCmn.fs_kHz * 3;
+ slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2;
+
+ silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part );
+ shift = slope_part;
+ silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) );
+ shift += flat_part;
+ silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+ /* Update pointer: next LPC analysis block */
+ x_ptr += psEnc->sCmn.subfr_length;
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Calculate warped auto correlation */
+ silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping,
+ psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+ } else {
+ /* Calculate regular auto correlation */
+ silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
+ }
+
+ /* Add white noise, as a fraction of energy */
+ auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION;
+
+ /* Convert correlations to prediction coefficients, and compute residual energy */
+ nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder );
+ psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg );
+
+ if( psEnc->sCmn.warping_Q16 > 0 ) {
+ /* Adjust gain for warping */
+ psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder );
+ }
+
+ /* Bandwidth expansion for synthesis filter shaping */
+ silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 );
+
+ /* Compute noise shaping filter coefficients */
+ silk_memcpy(
+ &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ],
+ &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ],
+ psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) );
+
+ /* Bandwidth expansion for analysis filter shaping */
+ silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 );
+
+ /* Ratio of prediction gains, in energy domain */
+ pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder );
+ nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder );
+ psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg );
+
+ /* Convert to monic warped prediction coefficients and limit absolute values */
+ warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ],
+ warping, 3.999f, psEnc->sCmn.shapingLPCOrder );
+ }
+
+ /*****************/
+ /* Gain tweaking */
+ /*****************/
+ /* Increase gains during low speech activity */
+ gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB );
+ gain_add = (silk_float)pow( 2.0f, 0.16f * MIN_QGAIN_DB );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->Gains[ k ] *= gain_mult;
+ psEncCtrl->Gains[ k ] += gain_add;
+ }
+
+ gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT;
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->GainsPre[ k ] *= gain_mult;
+ }
+
+ /************************************************/
+ /* Control low-frequency shaping and noise tilt */
+ /************************************************/
+ /* Less low frequency shaping for noisy inputs */
+ strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) );
+ strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f );
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+ /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ];
+ psEncCtrl->LF_MA_shp[ k ] = -1.0f + b;
+ psEncCtrl->LF_AR_shp[ k ] = 1.0f - b - b * strength;
+ }
+ Tilt = - HP_NOISE_COEF -
+ (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f );
+ } else {
+ b = 1.3f / psEnc->sCmn.fs_kHz;
+ psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b;
+ psEncCtrl->LF_AR_shp[ 0 ] = 1.0f - b - b * strength * 0.6f;
+ for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ];
+ psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ];
+ }
+ Tilt = -HP_NOISE_COEF;
+ }
+
+ /****************************/
+ /* HARMONIC SHAPING CONTROL */
+ /****************************/
+ /* Control boosting of harmonic frequencies */
+ HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr;
+
+ /* More harmonic boost for noisy input signals */
+ HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality );
+
+ if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ /* Harmonic noise shaping */
+ HarmShapeGain = HARMONIC_SHAPING;
+
+ /* More harmonic noise shaping for high bitrates or noisy input */
+ HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING *
+ ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality );
+
+ /* Less harmonic noise shaping for less periodic signals */
+ HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr );
+ } else {
+ HarmShapeGain = 0.0f;
+ }
+
+ /*************************/
+ /* Smooth over subframes */
+ /*************************/
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psShapeSt->HarmBoost_smth += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth );
+ psEncCtrl->HarmBoost[ k ] = psShapeSt->HarmBoost_smth;
+ psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth );
+ psEncCtrl->HarmShapeGain[ k ] = psShapeSt->HarmShapeGain_smth;
+ psShapeSt->Tilt_smth += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth );
+ psEncCtrl->Tilt[ k ] = psShapeSt->Tilt_smth;
+ }
+}
diff --git a/drivers/opus/silk/float/pitch_analysis_core_FLP.c b/drivers/opus/silk/float/pitch_analysis_core_FLP.c
new file mode 100644
index 0000000000..2588094c49
--- /dev/null
+++ b/drivers/opus/silk/float/pitch_analysis_core_FLP.c
@@ -0,0 +1,630 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/*****************************************************************************
+* Pitch analyser function
+******************************************************************************/
+#include "SigProc_FLP.h"
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+#include "pitch.h"
+
+#define SCRATCH_SIZE 22
+
+/************************************************************/
+/* Internally used functions */
+/************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+ silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+ const silk_float frame[], /* I vector to correlate */
+ opus_int start_lag, /* I start lag */
+ opus_int sf_length, /* I sub frame length */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
+);
+
+static void silk_P_Ana_calc_energy_st3(
+ silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+ const silk_float frame[], /* I vector to correlate */
+ opus_int start_lag, /* I start lag */
+ opus_int sf_length, /* I sub frame length */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity /* I Complexity setting */
+);
+
+/************************************************************/
+/* CORE PITCH ANALYSIS FUNCTION */
+/************************************************************/
+opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */
+ const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
+ opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */
+ opus_int16 *lagIndex, /* O Lag Index */
+ opus_int8 *contourIndex, /* O Pitch contour Index */
+ silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */
+ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
+ const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */
+ const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */
+ const opus_int Fs_kHz, /* I sample frequency (kHz) */
+ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
+ const opus_int nb_subfr, /* I Number of 5 ms subframes */
+ int arch /* I Run-time architecture */
+)
+{
+ opus_int i, k, d, j;
+ silk_float frame_8kHz[ PE_MAX_FRAME_LENGTH_MS * 8 ];
+ silk_float frame_4kHz[ PE_MAX_FRAME_LENGTH_MS * 4 ];
+ opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ];
+ opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ];
+ opus_int32 filt_state[ 6 ];
+ silk_float threshold, contour_bias;
+ silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];
+ opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];
+ silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];
+ const silk_float *target_ptr, *basis_ptr;
+ double cross_corr, normalizer, energy, energy_tmp;
+ opus_int d_srch[ PE_D_SRCH_LENGTH ];
+ opus_int16 d_comp[ (PE_MAX_LAG >> 1) + 5 ];
+ opus_int length_d_srch, length_d_comp;
+ silk_float Cmax, CCmax, CCmax_b, CCmax_new_b, CCmax_new;
+ opus_int CBimax, CBimax_new, lag, start_lag, end_lag, lag_new;
+ opus_int cbk_size;
+ silk_float lag_log2, prevLag_log2, delta_lag_log2_sqr;
+ silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
+ silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
+ opus_int lag_counter;
+ opus_int frame_length, frame_length_8kHz, frame_length_4kHz;
+ opus_int sf_length, sf_length_8kHz, sf_length_4kHz;
+ opus_int min_lag, min_lag_8kHz, min_lag_4kHz;
+ opus_int max_lag, max_lag_8kHz, max_lag_4kHz;
+ opus_int nb_cbk_search;
+ const opus_int8 *Lag_CB_ptr;
+
+ /* Check for valid sampling frequency */
+ silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+
+ /* Check for valid complexity setting */
+ silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+ silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+ silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f );
+ silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f );
+
+ /* Set up frame lengths max / min lag for the sampling frequency */
+ frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
+ frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
+ frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
+ sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz;
+ sf_length_4kHz = PE_SUBFR_LENGTH_MS * 4;
+ sf_length_8kHz = PE_SUBFR_LENGTH_MS * 8;
+ min_lag = PE_MIN_LAG_MS * Fs_kHz;
+ min_lag_4kHz = PE_MIN_LAG_MS * 4;
+ min_lag_8kHz = PE_MIN_LAG_MS * 8;
+ max_lag = PE_MAX_LAG_MS * Fs_kHz - 1;
+ max_lag_4kHz = PE_MAX_LAG_MS * 4;
+ max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1;
+
+ /* Resample from input sampled at Fs_kHz to 8 kHz */
+ if( Fs_kHz == 16 ) {
+ /* Resample to 16 -> 8 khz */
+ opus_int16 frame_16_FIX[ 16 * PE_MAX_FRAME_LENGTH_MS ];
+ silk_float2short_array( frame_16_FIX, frame, frame_length );
+ silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+ silk_resampler_down2( filt_state, frame_8_FIX, frame_16_FIX, frame_length );
+ silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz );
+ } else if( Fs_kHz == 12 ) {
+ /* Resample to 12 -> 8 khz */
+ opus_int16 frame_12_FIX[ 12 * PE_MAX_FRAME_LENGTH_MS ];
+ silk_float2short_array( frame_12_FIX, frame, frame_length );
+ silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
+ silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length );
+ silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz );
+ } else {
+ silk_assert( Fs_kHz == 8 );
+ silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz );
+ }
+
+ /* Decimate again to 4 kHz */
+ silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+ silk_resampler_down2( filt_state, frame_4_FIX, frame_8_FIX, frame_length_8kHz );
+ silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz );
+
+ /* Low-pass filter */
+ for( i = frame_length_4kHz - 1; i > 0; i-- ) {
+ frame_4kHz[ i ] += frame_4kHz[ i - 1 ];
+ }
+
+ /******************************************************************************
+ * FIRST STAGE, operating in 4 khz
+ ******************************************************************************/
+ silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
+ target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
+ for( k = 0; k < nb_subfr >> 1; k++ ) {
+ /* Check that we are within range of the array */
+ silk_assert( target_ptr >= frame_4kHz );
+ silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+ basis_ptr = target_ptr - min_lag_4kHz;
+
+ /* Check that we are within range of the array */
+ silk_assert( basis_ptr >= frame_4kHz );
+ silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+ celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
+
+ /* Calculate first vector products before loop */
+ cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
+ normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) +
+ silk_energy_FLP( basis_ptr, sf_length_8kHz ) +
+ sf_length_8kHz * 4000.0f;
+
+ C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
+
+ /* From now on normalizer is computed recursively */
+ for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
+ basis_ptr--;
+
+ /* Check that we are within range of the array */
+ silk_assert( basis_ptr >= frame_4kHz );
+ silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+ cross_corr = xcorr[ max_lag_4kHz - d ];
+
+ /* Add contribution of new sample and remove contribution from oldest sample */
+ normalizer +=
+ basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] -
+ basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ];
+ C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer );
+ }
+ /* Update target pointer */
+ target_ptr += sf_length_8kHz;
+ }
+
+ /* Apply short-lag bias */
+ for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
+ C[ 0 ][ i ] -= C[ 0 ][ i ] * i / 4096.0f;
+ }
+
+ /* Sort */
+ length_d_srch = 4 + 2 * complexity;
+ silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+ silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );
+
+ /* Escape if correlation is very low already here */
+ Cmax = C[ 0 ][ min_lag_4kHz ];
+ if( Cmax < 0.2f ) {
+ silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+ *LTPCorr = 0.0f;
+ *lagIndex = 0;
+ *contourIndex = 0;
+ return 1;
+ }
+
+ threshold = search_thres1 * Cmax;
+ for( i = 0; i < length_d_srch; i++ ) {
+ /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
+ if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) {
+ d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 );
+ } else {
+ length_d_srch = i;
+ break;
+ }
+ }
+ silk_assert( length_d_srch > 0 );
+
+ for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) {
+ d_comp[ i ] = 0;
+ }
+ for( i = 0; i < length_d_srch; i++ ) {
+ d_comp[ d_srch[ i ] ] = 1;
+ }
+
+ /* Convolution */
+ for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
+ d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ];
+ }
+
+ length_d_srch = 0;
+ for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) {
+ if( d_comp[ i + 1 ] > 0 ) {
+ d_srch[ length_d_srch ] = i;
+ length_d_srch++;
+ }
+ }
+
+ /* Convolution */
+ for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
+ d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ];
+ }
+
+ length_d_comp = 0;
+ for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) {
+ if( d_comp[ i ] > 0 ) {
+ d_comp[ length_d_comp ] = (opus_int16)( i - 2 );
+ length_d_comp++;
+ }
+ }
+
+ /**********************************************************************************
+ ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
+ *************************************************************************************/
+ /*********************************************************************************
+ * Find energy of each subframe projected onto its history, for a range of delays
+ *********************************************************************************/
+ silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float));
+
+ if( Fs_kHz == 8 ) {
+ target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * 8 ];
+ } else {
+ target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
+ }
+ for( k = 0; k < nb_subfr; k++ ) {
+ energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0;
+ for( j = 0; j < length_d_comp; j++ ) {
+ d = d_comp[ j ];
+ basis_ptr = target_ptr - d;
+ cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );
+ if( cross_corr > 0.0f ) {
+ energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
+ C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );
+ } else {
+ C[ k ][ d ] = 0.0f;
+ }
+ }
+ target_ptr += sf_length_8kHz;
+ }
+
+ /* search over lag range and lags codebook */
+ /* scale factor for lag codebook, as a function of center lag */
+
+ CCmax = 0.0f; /* This value doesn't matter */
+ CCmax_b = -1000.0f;
+
+ CBimax = 0; /* To avoid returning undefined lag values */
+ lag = -1; /* To check if lag with strong enough correlation has been found */
+
+ if( prevLag > 0 ) {
+ if( Fs_kHz == 12 ) {
+ prevLag = silk_LSHIFT( prevLag, 1 ) / 3;
+ } else if( Fs_kHz == 16 ) {
+ prevLag = silk_RSHIFT( prevLag, 1 );
+ }
+ prevLag_log2 = silk_log2( (silk_float)prevLag );
+ } else {
+ prevLag_log2 = 0;
+ }
+
+ /* Set up stage 2 codebook based on number of subframes */
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ cbk_size = PE_NB_CBKS_STAGE2_EXT;
+ Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+ if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
+ /* If input is 8 khz use a larger codebook here because it is last stage */
+ nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
+ } else {
+ nb_cbk_search = PE_NB_CBKS_STAGE2;
+ }
+ } else {
+ cbk_size = PE_NB_CBKS_STAGE2_10MS;
+ Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+ nb_cbk_search = PE_NB_CBKS_STAGE2_10MS;
+ }
+
+ for( k = 0; k < length_d_srch; k++ ) {
+ d = d_srch[ k ];
+ for( j = 0; j < nb_cbk_search; j++ ) {
+ CC[j] = 0.0f;
+ for( i = 0; i < nb_subfr; i++ ) {
+ /* Try all codebooks */
+ CC[ j ] += C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )];
+ }
+ }
+ /* Find best codebook */
+ CCmax_new = -1000.0f;
+ CBimax_new = 0;
+ for( i = 0; i < nb_cbk_search; i++ ) {
+ if( CC[ i ] > CCmax_new ) {
+ CCmax_new = CC[ i ];
+ CBimax_new = i;
+ }
+ }
+
+ /* Bias towards shorter lags */
+ lag_log2 = silk_log2( (silk_float)d );
+ CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
+
+ /* Bias towards previous lag */
+ if( prevLag > 0 ) {
+ delta_lag_log2_sqr = lag_log2 - prevLag_log2;
+ delta_lag_log2_sqr *= delta_lag_log2_sqr;
+ CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f );
+ }
+
+ if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
+ CCmax_new > nb_subfr * search_thres2 /* Correlation needs to be high enough to be voiced */
+ ) {
+ CCmax_b = CCmax_new_b;
+ CCmax = CCmax_new;
+ lag = d;
+ CBimax = CBimax_new;
+ }
+ }
+
+ if( lag == -1 ) {
+ /* No suitable candidate found */
+ silk_memset( pitch_out, 0, PE_MAX_NB_SUBFR * sizeof(opus_int) );
+ *LTPCorr = 0.0f;
+ *lagIndex = 0;
+ *contourIndex = 0;
+ return 1;
+ }
+
+ /* Output normalized correlation */
+ *LTPCorr = (silk_float)( CCmax / nb_subfr );
+ silk_assert( *LTPCorr >= 0.0f );
+
+ if( Fs_kHz > 8 ) {
+ /* Search in original signal */
+
+ /* Compensate for decimation */
+ silk_assert( lag == silk_SAT16( lag ) );
+ if( Fs_kHz == 12 ) {
+ lag = silk_RSHIFT_ROUND( silk_SMULBB( lag, 3 ), 1 );
+ } else { /* Fs_kHz == 16 */
+ lag = silk_LSHIFT( lag, 1 );
+ }
+
+ lag = silk_LIMIT_int( lag, min_lag, max_lag );
+ start_lag = silk_max_int( lag - 2, min_lag );
+ end_lag = silk_min_int( lag + 2, max_lag );
+ lag_new = lag; /* to avoid undefined lag */
+ CBimax = 0; /* to avoid undefined lag */
+
+ CCmax = -1000.0f;
+
+ /* Calculate the correlations and energies needed in stage 3 */
+ silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
+ silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+
+ lag_counter = 0;
+ silk_assert( lag == silk_SAT16( lag ) );
+ contour_bias = PE_FLATCONTOUR_BIAS / lag;
+
+ /* Set up cbk parameters according to complexity setting and frame length */
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
+ cbk_size = PE_NB_CBKS_STAGE3_MAX;
+ Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+ } else {
+ nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+ cbk_size = PE_NB_CBKS_STAGE3_10MS;
+ Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+ }
+
+ target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+ energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0;
+ for( d = start_lag; d <= end_lag; d++ ) {
+ for( j = 0; j < nb_cbk_search; j++ ) {
+ cross_corr = 0.0;
+ energy = energy_tmp;
+ for( k = 0; k < nb_subfr; k++ ) {
+ cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ];
+ energy += energies_st3[ k ][ j ][ lag_counter ];
+ }
+ if( cross_corr > 0.0 ) {
+ CCmax_new = (silk_float)( 2 * cross_corr / energy );
+ /* Reduce depending on flatness of contour */
+ CCmax_new *= 1.0f - contour_bias * j;
+ } else {
+ CCmax_new = 0.0f;
+ }
+
+ if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
+ CCmax = CCmax_new;
+ lag_new = d;
+ CBimax = j;
+ }
+ }
+ lag_counter++;
+ }
+
+ for( k = 0; k < nb_subfr; k++ ) {
+ pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
+ }
+ *lagIndex = (opus_int16)( lag_new - min_lag );
+ *contourIndex = (opus_int8)CBimax;
+ } else { /* Fs_kHz == 8 */
+ /* Save Lags */
+ for( k = 0; k < nb_subfr; k++ ) {
+ pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+ pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
+ }
+ *lagIndex = (opus_int16)( lag - min_lag_8kHz );
+ *contourIndex = (opus_int8)CBimax;
+ }
+ silk_assert( *lagIndex >= 0 );
+ /* return as voiced */
+ return 0;
+}
+
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+ silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+ const silk_float frame[], /* I vector to correlate */
+ opus_int start_lag, /* I start lag */
+ opus_int sf_length, /* I sub frame length */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity, /* I Complexity setting */
+ int arch /* I Run-time architecture */
+)
+{
+ const silk_float *target_ptr;
+ opus_int i, j, k, lag_counter, lag_low, lag_high;
+ opus_int nb_cbk_search, delta, idx, cbk_size;
+ silk_float scratch_mem[ SCRATCH_SIZE ];
+ opus_val32 xcorr[ SCRATCH_SIZE ];
+ const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+
+ silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+ silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+ nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+ cbk_size = PE_NB_CBKS_STAGE3_MAX;
+ } else {
+ silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+ Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+ nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+ cbk_size = PE_NB_CBKS_STAGE3_10MS;
+ }
+
+ target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
+ for( k = 0; k < nb_subfr; k++ ) {
+ lag_counter = 0;
+
+ /* Calculate the correlations for each subframe */
+ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+ lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+ silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+ celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
+ for( j = lag_low; j <= lag_high; j++ ) {
+ silk_assert( lag_counter < SCRATCH_SIZE );
+ scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
+ lag_counter++;
+ }
+
+ delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+ for( i = 0; i < nb_cbk_search; i++ ) {
+ /* Fill out the 3 dim array that stores the correlations for */
+ /* each code_book vector for each start lag */
+ idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+ for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+ silk_assert( idx + j < SCRATCH_SIZE );
+ silk_assert( idx + j < lag_counter );
+ cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
+ }
+ }
+ target_ptr += sf_length;
+ }
+}
+
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively. */
+/********************************************************************/
+static void silk_P_Ana_calc_energy_st3(
+ silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+ const silk_float frame[], /* I vector to correlate */
+ opus_int start_lag, /* I start lag */
+ opus_int sf_length, /* I sub frame length */
+ opus_int nb_subfr, /* I number of subframes */
+ opus_int complexity /* I Complexity setting */
+)
+{
+ const silk_float *target_ptr, *basis_ptr;
+ double energy;
+ opus_int k, i, j, lag_counter;
+ opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff;
+ silk_float scratch_mem[ SCRATCH_SIZE ];
+ const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+
+ silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+ silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+ if( nb_subfr == PE_MAX_NB_SUBFR ) {
+ Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+ nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+ cbk_size = PE_NB_CBKS_STAGE3_MAX;
+ } else {
+ silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+ Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+ Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+ nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+ cbk_size = PE_NB_CBKS_STAGE3_10MS;
+ }
+
+ target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
+ for( k = 0; k < nb_subfr; k++ ) {
+ lag_counter = 0;
+
+ /* Calculate the energy for first lag */
+ basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
+ energy = silk_energy_FLP( basis_ptr, sf_length ) + 1e-3;
+ silk_assert( energy >= 0.0 );
+ scratch_mem[lag_counter] = (silk_float)energy;
+ lag_counter++;
+
+ lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
+ for( i = 1; i < lag_diff; i++ ) {
+ /* remove part outside new window */
+ energy -= basis_ptr[sf_length - i] * (double)basis_ptr[sf_length - i];
+ silk_assert( energy >= 0.0 );
+
+ /* add part that comes into window */
+ energy += basis_ptr[ -i ] * (double)basis_ptr[ -i ];
+ silk_assert( energy >= 0.0 );
+ silk_assert( lag_counter < SCRATCH_SIZE );
+ scratch_mem[lag_counter] = (silk_float)energy;
+ lag_counter++;
+ }
+
+ delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+ for( i = 0; i < nb_cbk_search; i++ ) {
+ /* Fill out the 3 dim array that stores the correlations for */
+ /* each code_book vector for each start lag */
+ idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+ for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+ silk_assert( idx + j < SCRATCH_SIZE );
+ silk_assert( idx + j < lag_counter );
+ energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
+ silk_assert( energies_st3[ k ][ i ][ j ] >= 0.0f );
+ }
+ }
+ target_ptr += sf_length;
+ }
+}
diff --git a/drivers/opus/silk/float/prefilter_FLP.c b/drivers/opus/silk/float/prefilter_FLP.c
new file mode 100644
index 0000000000..aa43852ff1
--- /dev/null
+++ b/drivers/opus/silk/float/prefilter_FLP.c
@@ -0,0 +1,206 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/*
+* Prefilter for finding Quantizer input signal
+*/
+static OPUS_INLINE void silk_prefilt_FLP(
+ silk_prefilter_state_FLP *P, /* I/O state */
+ silk_float st_res[], /* I */
+ silk_float xw[], /* O */
+ silk_float *HarmShapeFIR, /* I */
+ silk_float Tilt, /* I */
+ silk_float LF_MA_shp, /* I */
+ silk_float LF_AR_shp, /* I */
+ opus_int lag, /* I */
+ opus_int length /* I */
+);
+
+static void silk_warped_LPC_analysis_filter_FLP(
+ silk_float state[], /* I/O State [order + 1] */
+ silk_float res[], /* O Residual signal [length] */
+ const silk_float coef[], /* I Coefficients [order] */
+ const silk_float input[], /* I Input signal [length] */
+ const silk_float lambda, /* I Warping factor */
+ const opus_int length, /* I Length of input signal */
+ const opus_int order /* I Filter order (even) */
+)
+{
+ opus_int n, i;
+ silk_float acc, tmp1, tmp2;
+
+ /* Order must be even */
+ silk_assert( ( order & 1 ) == 0 );
+
+ for( n = 0; n < length; n++ ) {
+ /* Output of lowpass section */
+ tmp2 = state[ 0 ] + lambda * state[ 1 ];
+ state[ 0 ] = input[ n ];
+ /* Output of allpass section */
+ tmp1 = state[ 1 ] + lambda * ( state[ 2 ] - tmp2 );
+ state[ 1 ] = tmp2;
+ acc = coef[ 0 ] * tmp2;
+ /* Loop over allpass sections */
+ for( i = 2; i < order; i += 2 ) {
+ /* Output of allpass section */
+ tmp2 = state[ i ] + lambda * ( state[ i + 1 ] - tmp1 );
+ state[ i ] = tmp1;
+ acc += coef[ i - 1 ] * tmp1;
+ /* Output of allpass section */
+ tmp1 = state[ i + 1 ] + lambda * ( state[ i + 2 ] - tmp2 );
+ state[ i + 1 ] = tmp2;
+ acc += coef[ i ] * tmp2;
+ }
+ state[ order ] = tmp1;
+ acc += coef[ order - 1 ] * tmp1;
+ res[ n ] = input[ n ] - acc;
+ }
+}
+
+/*
+* silk_prefilter. Main prefilter function
+*/
+void silk_prefilter_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */
+ silk_float xw[], /* O Weighted signal */
+ const silk_float x[] /* I Speech signal */
+)
+{
+ silk_prefilter_state_FLP *P = &psEnc->sPrefilt;
+ opus_int j, k, lag;
+ silk_float HarmShapeGain, Tilt, LF_MA_shp, LF_AR_shp;
+ silk_float B[ 2 ];
+ const silk_float *AR1_shp;
+ const silk_float *px;
+ silk_float *pxw;
+ silk_float HarmShapeFIR[ 3 ];
+ silk_float st_res[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
+
+ /* Set up pointers */
+ px = x;
+ pxw = xw;
+ lag = P->lagPrev;
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ /* Update Variables that change per sub frame */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ lag = psEncCtrl->pitchL[ k ];
+ }
+
+ /* Noise shape parameters */
+ HarmShapeGain = psEncCtrl->HarmShapeGain[ k ] * ( 1.0f - psEncCtrl->HarmBoost[ k ] );
+ HarmShapeFIR[ 0 ] = 0.25f * HarmShapeGain;
+ HarmShapeFIR[ 1 ] = 32767.0f / 65536.0f * HarmShapeGain;
+ HarmShapeFIR[ 2 ] = 0.25f * HarmShapeGain;
+ Tilt = psEncCtrl->Tilt[ k ];
+ LF_MA_shp = psEncCtrl->LF_MA_shp[ k ];
+ LF_AR_shp = psEncCtrl->LF_AR_shp[ k ];
+ AR1_shp = &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ];
+
+ /* Short term FIR filtering */
+ silk_warped_LPC_analysis_filter_FLP( P->sAR_shp, st_res, AR1_shp, px,
+ (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+
+ /* Reduce (mainly) low frequencies during harmonic emphasis */
+ B[ 0 ] = psEncCtrl->GainsPre[ k ];
+ B[ 1 ] = -psEncCtrl->GainsPre[ k ] *
+ ( psEncCtrl->HarmBoost[ k ] * HarmShapeGain + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT );
+ pxw[ 0 ] = B[ 0 ] * st_res[ 0 ] + B[ 1 ] * P->sHarmHP;
+ for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
+ pxw[ j ] = B[ 0 ] * st_res[ j ] + B[ 1 ] * st_res[ j - 1 ];
+ }
+ P->sHarmHP = st_res[ psEnc->sCmn.subfr_length - 1 ];
+
+ silk_prefilt_FLP( P, pxw, pxw, HarmShapeFIR, Tilt, LF_MA_shp, LF_AR_shp, lag, psEnc->sCmn.subfr_length );
+
+ px += psEnc->sCmn.subfr_length;
+ pxw += psEnc->sCmn.subfr_length;
+ }
+ P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+}
+
+/*
+* Prefilter for finding Quantizer input signal
+*/
+static OPUS_INLINE void silk_prefilt_FLP(
+ silk_prefilter_state_FLP *P, /* I/O state */
+ silk_float st_res[], /* I */
+ silk_float xw[], /* O */
+ silk_float *HarmShapeFIR, /* I */
+ silk_float Tilt, /* I */
+ silk_float LF_MA_shp, /* I */
+ silk_float LF_AR_shp, /* I */
+ opus_int lag, /* I */
+ opus_int length /* I */
+)
+{
+ opus_int i;
+ opus_int idx, LTP_shp_buf_idx;
+ silk_float n_Tilt, n_LF, n_LTP;
+ silk_float sLF_AR_shp, sLF_MA_shp;
+ silk_float *LTP_shp_buf;
+
+ /* To speed up use temp variables instead of using the struct */
+ LTP_shp_buf = P->sLTP_shp;
+ LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+ sLF_AR_shp = P->sLF_AR_shp;
+ sLF_MA_shp = P->sLF_MA_shp;
+
+ for( i = 0; i < length; i++ ) {
+ if( lag > 0 ) {
+ silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+ idx = lag + LTP_shp_buf_idx;
+ n_LTP = LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ] * HarmShapeFIR[ 0 ];
+ n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ] * HarmShapeFIR[ 1 ];
+ n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ] * HarmShapeFIR[ 2 ];
+ } else {
+ n_LTP = 0;
+ }
+
+ n_Tilt = sLF_AR_shp * Tilt;
+ n_LF = sLF_AR_shp * LF_AR_shp + sLF_MA_shp * LF_MA_shp;
+
+ sLF_AR_shp = st_res[ i ] - n_Tilt;
+ sLF_MA_shp = sLF_AR_shp - n_LF;
+
+ LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+ LTP_shp_buf[ LTP_shp_buf_idx ] = sLF_MA_shp;
+
+ xw[ i ] = sLF_MA_shp - n_LTP;
+ }
+ /* Copy temp variable back to state */
+ P->sLF_AR_shp = sLF_AR_shp;
+ P->sLF_MA_shp = sLF_MA_shp;
+ P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
diff --git a/drivers/opus/silk/float/process_gains_FLP.c b/drivers/opus/silk/float/process_gains_FLP.c
new file mode 100644
index 0000000000..e83d05552a
--- /dev/null
+++ b/drivers/opus/silk/float/process_gains_FLP.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Processing of gains */
+void silk_process_gains_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ opus_int condCoding /* I The type of conditional coding to use */
+)
+{
+ silk_shape_state_FLP *psShapeSt = &psEnc->sShape;
+ opus_int k;
+ opus_int32 pGains_Q16[ MAX_NB_SUBFR ];
+ silk_float s, InvMaxSqrVal, gain, quant_offset;
+
+ /* Gain reduction when LTP coding gain is high */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ s = 1.0f - 0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) );
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->Gains[ k ] *= s;
+ }
+ }
+
+ /* Limit the quantized signal */
+ InvMaxSqrVal = ( silk_float )( pow( 2.0f, 0.33f * ( 21.0f - psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) ) ) / psEnc->sCmn.subfr_length );
+
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ /* Soft limit on ratio residual energy and squared gains */
+ gain = psEncCtrl->Gains[ k ];
+ gain = ( silk_float )sqrt( gain * gain + psEncCtrl->ResNrg[ k ] * InvMaxSqrVal );
+ psEncCtrl->Gains[ k ] = silk_min_float( gain, 32767.0f );
+ }
+
+ /* Prepare gains for noise shaping quantization */
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ pGains_Q16[ k ] = (opus_int32)( psEncCtrl->Gains[ k ] * 65536.0f );
+ }
+
+ /* Save unquantized gains and gain Index */
+ silk_memcpy( psEncCtrl->GainsUnq_Q16, pGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+ psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex;
+
+ /* Quantize gains */
+ silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
+ &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+ /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+ psEncCtrl->Gains[ k ] = pGains_Q16[ k ] / 65536.0f;
+ }
+
+ /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */
+ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+ if( psEncCtrl->LTPredCodGain + psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ) > 1.0f ) {
+ psEnc->sCmn.indices.quantOffsetType = 0;
+ } else {
+ psEnc->sCmn.indices.quantOffsetType = 1;
+ }
+ }
+
+ /* Quantizer boundary adjustment */
+ quant_offset = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ] / 1024.0f;
+ psEncCtrl->Lambda = LAMBDA_OFFSET
+ + LAMBDA_DELAYED_DECISIONS * psEnc->sCmn.nStatesDelayedDecision
+ + LAMBDA_SPEECH_ACT * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f )
+ + LAMBDA_INPUT_QUALITY * psEncCtrl->input_quality
+ + LAMBDA_CODING_QUALITY * psEncCtrl->coding_quality
+ + LAMBDA_QUANT_OFFSET * quant_offset;
+
+ silk_assert( psEncCtrl->Lambda > 0.0f );
+ silk_assert( psEncCtrl->Lambda < 2.0f );
+}
diff --git a/drivers/opus/silk/float/regularize_correlations_FLP.c b/drivers/opus/silk/float/regularize_correlations_FLP.c
new file mode 100644
index 0000000000..f056eadc57
--- /dev/null
+++ b/drivers/opus/silk/float/regularize_correlations_FLP.c
@@ -0,0 +1,48 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FLP(
+ silk_float *XX, /* I/O Correlation matrices */
+ silk_float *xx, /* I/O Correlation values */
+ const silk_float noise, /* I Noise energy to add */
+ const opus_int D /* I Dimension of XX */
+)
+{
+ opus_int i;
+
+ for( i = 0; i < D; i++ ) {
+ matrix_ptr( &XX[ 0 ], i, i, D ) += noise;
+ }
+ xx[ 0 ] += noise;
+}
diff --git a/drivers/opus/silk/float/residual_energy_FLP.c b/drivers/opus/silk/float/residual_energy_FLP.c
new file mode 100644
index 0000000000..011efcef04
--- /dev/null
+++ b/drivers/opus/silk/float/residual_energy_FLP.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+#define MAX_ITERATIONS_RESIDUAL_NRG 10
+#define REGULARIZATION_FACTOR 1e-8f
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */
+ const silk_float *c, /* I Filter coefficients */
+ silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */
+ const silk_float *wXx, /* I Weighted correlation vector */
+ const silk_float wxx, /* I Weighted correlation value */
+ const opus_int D /* I Dimension */
+)
+{
+ opus_int i, j, k;
+ silk_float tmp, nrg = 0.0f, regularization;
+
+ /* Safety checks */
+ silk_assert( D >= 0 );
+
+ regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] );
+ for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) {
+ nrg = wxx;
+
+ tmp = 0.0f;
+ for( i = 0; i < D; i++ ) {
+ tmp += wXx[ i ] * c[ i ];
+ }
+ nrg -= 2.0f * tmp;
+
+ /* compute c' * wXX * c, assuming wXX is symmetric */
+ for( i = 0; i < D; i++ ) {
+ tmp = 0.0f;
+ for( j = i + 1; j < D; j++ ) {
+ tmp += matrix_c_ptr( wXX, i, j, D ) * c[ j ];
+ }
+ nrg += c[ i ] * ( 2.0f * tmp + matrix_c_ptr( wXX, i, i, D ) * c[ i ] );
+ }
+ if( nrg > 0 ) {
+ break;
+ } else {
+ /* Add white noise */
+ for( i = 0; i < D; i++ ) {
+ matrix_c_ptr( wXX, i, i, D ) += regularization;
+ }
+ /* Increase noise for next run */
+ regularization *= 2.0f;
+ }
+ }
+ if( k == MAX_ITERATIONS_RESIDUAL_NRG ) {
+ silk_assert( nrg == 0 );
+ nrg = 1.0f;
+ }
+
+ return nrg;
+}
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order */
+/* of preceding samples */
+void silk_residual_energy_FLP(
+ silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */
+ const silk_float x[], /* I Input signal */
+ silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */
+ const silk_float gains[], /* I Quantization gains */
+ const opus_int subfr_length, /* I Subframe length */
+ const opus_int nb_subfr, /* I number of subframes */
+ const opus_int LPC_order /* I LPC order */
+)
+{
+ opus_int shift;
+ silk_float *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ];
+
+ LPC_res_ptr = LPC_res + LPC_order;
+ shift = LPC_order + subfr_length;
+
+ /* Filter input to create the LPC residual for each frame half, and measure subframe energies */
+ silk_LPC_analysis_filter_FLP( LPC_res, a[ 0 ], x + 0 * shift, 2 * shift, LPC_order );
+ nrgs[ 0 ] = ( silk_float )( gains[ 0 ] * gains[ 0 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) );
+ nrgs[ 1 ] = ( silk_float )( gains[ 1 ] * gains[ 1 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) );
+
+ if( nb_subfr == MAX_NB_SUBFR ) {
+ silk_LPC_analysis_filter_FLP( LPC_res, a[ 1 ], x + 2 * shift, 2 * shift, LPC_order );
+ nrgs[ 2 ] = ( silk_float )( gains[ 2 ] * gains[ 2 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) );
+ nrgs[ 3 ] = ( silk_float )( gains[ 3 ] * gains[ 3 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) );
+ }
+}
diff --git a/drivers/opus/silk/float/scale_copy_vector_FLP.c b/drivers/opus/silk/float/scale_copy_vector_FLP.c
new file mode 100644
index 0000000000..7578d44894
--- /dev/null
+++ b/drivers/opus/silk/float/scale_copy_vector_FLP.c
@@ -0,0 +1,57 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* copy and multiply a vector by a constant */
+void silk_scale_copy_vector_FLP(
+ silk_float *data_out,
+ const silk_float *data_in,
+ silk_float gain,
+ opus_int dataSize
+)
+{
+ opus_int i, dataSize4;
+
+ /* 4x unrolled loop */
+ dataSize4 = dataSize & 0xFFFC;
+ for( i = 0; i < dataSize4; i += 4 ) {
+ data_out[ i + 0 ] = gain * data_in[ i + 0 ];
+ data_out[ i + 1 ] = gain * data_in[ i + 1 ];
+ data_out[ i + 2 ] = gain * data_in[ i + 2 ];
+ data_out[ i + 3 ] = gain * data_in[ i + 3 ];
+ }
+
+ /* any remaining elements */
+ for( ; i < dataSize; i++ ) {
+ data_out[ i ] = gain * data_in[ i ];
+ }
+}
diff --git a/drivers/opus/silk/float/scale_vector_FLP.c b/drivers/opus/silk/float/scale_vector_FLP.c
new file mode 100644
index 0000000000..03345d519d
--- /dev/null
+++ b/drivers/opus/silk/float/scale_vector_FLP.c
@@ -0,0 +1,56 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* multiply a vector by a constant */
+void silk_scale_vector_FLP(
+ silk_float *data1,
+ silk_float gain,
+ opus_int dataSize
+)
+{
+ opus_int i, dataSize4;
+
+ /* 4x unrolled loop */
+ dataSize4 = dataSize & 0xFFFC;
+ for( i = 0; i < dataSize4; i += 4 ) {
+ data1[ i + 0 ] *= gain;
+ data1[ i + 1 ] *= gain;
+ data1[ i + 2 ] *= gain;
+ data1[ i + 3 ] *= gain;
+ }
+
+ /* any remaining elements */
+ for( ; i < dataSize; i++ ) {
+ data1[ i ] *= gain;
+ }
+}
diff --git a/drivers/opus/silk/float/schur_FLP.c b/drivers/opus/silk/float/schur_FLP.c
new file mode 100644
index 0000000000..76b87f1304
--- /dev/null
+++ b/drivers/opus/silk/float/schur_FLP.c
@@ -0,0 +1,70 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+silk_float silk_schur_FLP( /* O returns residual energy */
+ silk_float refl_coef[], /* O reflection coefficients (length order) */
+ const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */
+ opus_int order /* I order */
+)
+{
+ opus_int k, n;
+ silk_float C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+ silk_float Ctmp1, Ctmp2, rc_tmp;
+
+ silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+ /* Copy correlations */
+ for( k = 0; k < order+1; k++ ) {
+ C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ];
+ }
+
+ for( k = 0; k < order; k++ ) {
+ /* Get reflection coefficient */
+ rc_tmp = -C[ k + 1 ][ 0 ] / silk_max_float( C[ 0 ][ 1 ], 1e-9f );
+
+ /* Save the output */
+ refl_coef[ k ] = rc_tmp;
+
+ /* Update correlations */
+ for( n = 0; n < order - k; n++ ) {
+ Ctmp1 = C[ n + k + 1 ][ 0 ];
+ Ctmp2 = C[ n ][ 1 ];
+ C[ n + k + 1 ][ 0 ] = Ctmp1 + Ctmp2 * rc_tmp;
+ C[ n ][ 1 ] = Ctmp2 + Ctmp1 * rc_tmp;
+ }
+ }
+
+ /* Return residual energy */
+ return C[ 0 ][ 1 ];
+}
+
diff --git a/drivers/opus/silk/float/solve_LS_FLP.c b/drivers/opus/silk/float/solve_LS_FLP.c
new file mode 100644
index 0000000000..9fd962b33d
--- /dev/null
+++ b/drivers/opus/silk/float/solve_LS_FLP.c
@@ -0,0 +1,207 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/**********************************************************************
+ * LDL Factorisation. Finds the upper triangular matrix L and the diagonal
+ * Matrix D (only the diagonal elements returned in a vector)such that
+ * the symmetric matric A is given by A = L*D*L'.
+ **********************************************************************/
+static OPUS_INLINE void silk_LDL_FLP(
+ silk_float *A, /* I/O Pointer to Symetric Square Matrix */
+ opus_int M, /* I Size of Matrix */
+ silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */
+ silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */
+);
+
+/**********************************************************************
+ * Function to solve linear equation Ax = b, when A is a MxM lower
+ * triangular matrix, with ones on the diagonal.
+ **********************************************************************/
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+ const silk_float *L, /* I Pointer to Lower Triangular Matrix */
+ opus_int M, /* I Dim of Matrix equation */
+ const silk_float *b, /* I b Vector */
+ silk_float *x /* O x Vector */
+);
+
+/**********************************************************************
+ * Function to solve linear equation (A^T)x = b, when A is a MxM lower
+ * triangular, with ones on the diagonal. (ie then A^T is upper triangular)
+ **********************************************************************/
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+ const silk_float *L, /* I Pointer to Lower Triangular Matrix */
+ opus_int M, /* I Dim of Matrix equation */
+ const silk_float *b, /* I b Vector */
+ silk_float *x /* O x Vector */
+);
+
+/**********************************************************************
+ * Function to solve linear equation Ax = b, when A is a MxM
+ * symmetric square matrix - using LDL factorisation
+ **********************************************************************/
+void silk_solve_LDL_FLP(
+ silk_float *A, /* I/O Symmetric square matrix, out: reg. */
+ const opus_int M, /* I Size of matrix */
+ const silk_float *b, /* I Pointer to b vector */
+ silk_float *x /* O Pointer to x solution vector */
+)
+{
+ opus_int i;
+ silk_float L[ MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ];
+ silk_float T[ MAX_MATRIX_SIZE ];
+ silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/
+
+ silk_assert( M <= MAX_MATRIX_SIZE );
+
+ /***************************************************
+ Factorize A by LDL such that A = L*D*(L^T),
+ where L is lower triangular with ones on diagonal
+ ****************************************************/
+ silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv );
+
+ /****************************************************
+ * substitute D*(L^T) = T. ie:
+ L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b
+ ******************************************************/
+ silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T );
+
+ /****************************************************
+ D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is
+ diagonal just multiply with 1/d_i
+ ****************************************************/
+ for( i = 0; i < M; i++ ) {
+ T[ i ] = T[ i ] * Dinv[ i ];
+ }
+ /****************************************************
+ x = inv(L') * inv(D) * T
+ *****************************************************/
+ silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x );
+}
+
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+ const silk_float *L, /* I Pointer to Lower Triangular Matrix */
+ opus_int M, /* I Dim of Matrix equation */
+ const silk_float *b, /* I b Vector */
+ silk_float *x /* O x Vector */
+)
+{
+ opus_int i, j;
+ silk_float temp;
+ const silk_float *ptr1;
+
+ for( i = M - 1; i >= 0; i-- ) {
+ ptr1 = matrix_adr( L, 0, i, M );
+ temp = 0;
+ for( j = M - 1; j > i ; j-- ) {
+ temp += ptr1[ j * M ] * x[ j ];
+ }
+ temp = b[ i ] - temp;
+ x[ i ] = temp;
+ }
+}
+
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+ const silk_float *L, /* I Pointer to Lower Triangular Matrix */
+ opus_int M, /* I Dim of Matrix equation */
+ const silk_float *b, /* I b Vector */
+ silk_float *x /* O x Vector */
+)
+{
+ opus_int i, j;
+ silk_float temp;
+ const silk_float *ptr1;
+
+ for( i = 0; i < M; i++ ) {
+ ptr1 = matrix_adr( L, i, 0, M );
+ temp = 0;
+ for( j = 0; j < i; j++ ) {
+ temp += ptr1[ j ] * x[ j ];
+ }
+ temp = b[ i ] - temp;
+ x[ i ] = temp;
+ }
+}
+
+static OPUS_INLINE void silk_LDL_FLP(
+ silk_float *A, /* I/O Pointer to Symetric Square Matrix */
+ opus_int M, /* I Size of Matrix */
+ silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */
+ silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */
+)
+{
+ opus_int i, j, k, loop_count, err = 1;
+ silk_float *ptr1, *ptr2;
+ double temp, diag_min_value;
+ silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/
+
+ silk_assert( M <= MAX_MATRIX_SIZE );
+
+ diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] );
+ for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) {
+ err = 0;
+ for( j = 0; j < M; j++ ) {
+ ptr1 = matrix_adr( L, j, 0, M );
+ temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/
+ for( i = 0; i < j; i++ ) {
+ v[ i ] = ptr1[ i ] * D[ i ];
+ temp -= ptr1[ i ] * v[ i ];
+ }
+ if( temp < diag_min_value ) {
+ /* Badly conditioned matrix: add white noise and run again */
+ temp = ( loop_count + 1 ) * diag_min_value - temp;
+ for( i = 0; i < M; i++ ) {
+ matrix_ptr( A, i, i, M ) += ( silk_float )temp;
+ }
+ err = 1;
+ break;
+ }
+ D[ j ] = ( silk_float )temp;
+ Dinv[ j ] = ( silk_float )( 1.0f / temp );
+ matrix_ptr( L, j, j, M ) = 1.0f;
+
+ ptr1 = matrix_adr( A, j, 0, M );
+ ptr2 = matrix_adr( L, j + 1, 0, M);
+ for( i = j + 1; i < M; i++ ) {
+ temp = 0.0;
+ for( k = 0; k < j; k++ ) {
+ temp += ptr2[ k ] * v[ k ];
+ }
+ matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] );
+ ptr2 += M; /* go to next column*/
+ }
+ }
+ }
+ silk_assert( err == 0 );
+}
+
diff --git a/drivers/opus/silk/float/sort_FLP.c b/drivers/opus/silk/float/sort_FLP.c
new file mode 100644
index 0000000000..58ea485116
--- /dev/null
+++ b/drivers/opus/silk/float/sort_FLP.c
@@ -0,0 +1,83 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Insertion sort (fast for already almost sorted arrays): */
+/* Best case: O(n) for an already sorted array */
+/* Worst case: O(n^2) for an inversely sorted array */
+
+#include "typedef.h"
+#include "SigProc_FLP.h"
+
+void silk_insertion_sort_decreasing_FLP(
+ silk_float *a, /* I/O Unsorted / Sorted vector */
+ opus_int *idx, /* O Index vector for the sorted elements */
+ const opus_int L, /* I Vector length */
+ const opus_int K /* I Number of correctly sorted positions */
+)
+{
+ silk_float value;
+ opus_int i, j;
+
+ /* Safety checks */
+ silk_assert( K > 0 );
+ silk_assert( L > 0 );
+ silk_assert( L >= K );
+
+ /* Write start indices in index vector */
+ for( i = 0; i < K; i++ ) {
+ idx[ i ] = i;
+ }
+
+ /* Sort vector elements by value, decreasing order */
+ for( i = 1; i < K; i++ ) {
+ value = a[ i ];
+ for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+ a[ j + 1 ] = a[ j ]; /* Shift value */
+ idx[ j + 1 ] = idx[ j ]; /* Shift index */
+ }
+ a[ j + 1 ] = value; /* Write value */
+ idx[ j + 1 ] = i; /* Write index */
+ }
+
+ /* If less than L values are asked check the remaining values, */
+ /* but only spend CPU to ensure that the K first values are correct */
+ for( i = K; i < L; i++ ) {
+ value = a[ i ];
+ if( value > a[ K - 1 ] ) {
+ for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+ a[ j + 1 ] = a[ j ]; /* Shift value */
+ idx[ j + 1 ] = idx[ j ]; /* Shift index */
+ }
+ a[ j + 1 ] = value; /* Write value */
+ idx[ j + 1 ] = i; /* Write index */
+ }
+ }
+}
diff --git a/drivers/opus/silk/float/structs_FLP.h b/drivers/opus/silk/float/structs_FLP.h
new file mode 100644
index 0000000000..4082914d93
--- /dev/null
+++ b/drivers/opus/silk/float/structs_FLP.h
@@ -0,0 +1,131 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_FLP_H
+#define SILK_STRUCTS_FLP_H
+
+#include "typedef.h"
+#include "silk_main.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/********************************/
+/* Noise shaping analysis state */
+/********************************/
+typedef struct {
+ opus_int8 LastGainIndex;
+ silk_float HarmBoost_smth;
+ silk_float HarmShapeGain_smth;
+ silk_float Tilt_smth;
+} silk_shape_state_FLP;
+
+/********************************/
+/* Prefilter state */
+/********************************/
+typedef struct {
+ silk_float sLTP_shp[ LTP_BUF_LENGTH ];
+ silk_float sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ];
+ opus_int sLTP_shp_buf_idx;
+ silk_float sLF_AR_shp;
+ silk_float sLF_MA_shp;
+ silk_float sHarmHP;
+ opus_int32 rand_seed;
+ opus_int lagPrev;
+} silk_prefilter_state_FLP;
+
+/********************************/
+/* Encoder state FLP */
+/********************************/
+typedef struct {
+ silk_encoder_state sCmn; /* Common struct, shared with fixed-point code */
+ silk_shape_state_FLP sShape; /* Noise shaping state */
+ silk_prefilter_state_FLP sPrefilt; /* Prefilter State */
+
+ /* Buffer for find pitch and noise shape analysis */
+ silk_float x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */
+ silk_float LTPCorr; /* Normalized correlation from pitch lag estimator */
+} silk_encoder_state_FLP;
+
+/************************/
+/* Encoder control FLP */
+/************************/
+typedef struct {
+ /* Prediction and coding parameters */
+ silk_float Gains[ MAX_NB_SUBFR ];
+ silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ]; /* holds interpolated and final coefficients */
+ silk_float LTPCoef[LTP_ORDER * MAX_NB_SUBFR];
+ silk_float LTP_scale;
+ opus_int pitchL[ MAX_NB_SUBFR ];
+
+ /* Noise shaping parameters */
+ silk_float AR1[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+ silk_float AR2[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+ silk_float LF_MA_shp[ MAX_NB_SUBFR ];
+ silk_float LF_AR_shp[ MAX_NB_SUBFR ];
+ silk_float GainsPre[ MAX_NB_SUBFR ];
+ silk_float HarmBoost[ MAX_NB_SUBFR ];
+ silk_float Tilt[ MAX_NB_SUBFR ];
+ silk_float HarmShapeGain[ MAX_NB_SUBFR ];
+ silk_float Lambda;
+ silk_float input_quality;
+ silk_float coding_quality;
+
+ /* Measures */
+ silk_float sparseness;
+ silk_float predGain;
+ silk_float LTPredCodGain;
+ silk_float ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */
+
+ /* Parameters for CBR mode */
+ opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ];
+ opus_int8 lastGainIndexPrev;
+} silk_encoder_control_FLP;
+
+/************************/
+/* Encoder Super Struct */
+/************************/
+typedef struct {
+ silk_encoder_state_FLP state_Fxx[ ENCODER_NUM_CHANNELS ];
+ stereo_enc_state sStereo;
+ opus_int32 nBitsExceeded;
+ opus_int nChannelsAPI;
+ opus_int nChannelsInternal;
+ opus_int nPrevChannelsInternal;
+ opus_int timeSinceSwitchAllowed_ms;
+ opus_int allowBandwidthSwitch;
+ opus_int prev_decode_only_middle;
+} silk_encoder;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/float/warped_autocorrelation_FLP.c b/drivers/opus/silk/float/warped_autocorrelation_FLP.c
new file mode 100644
index 0000000000..6075dfe8d3
--- /dev/null
+++ b/drivers/opus/silk/float/warped_autocorrelation_FLP.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FLP(
+ silk_float *corr, /* O Result [order + 1] */
+ const silk_float *input, /* I Input data to correlate */
+ const silk_float warping, /* I Warping coefficient */
+ const opus_int length, /* I Length of input */
+ const opus_int order /* I Correlation order (even) */
+)
+{
+ opus_int n, i;
+ double tmp1, tmp2;
+ double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+ double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+
+ /* Order must be even */
+ silk_assert( ( order & 1 ) == 0 );
+
+ /* Loop over samples */
+ for( n = 0; n < length; n++ ) {
+ tmp1 = input[ n ];
+ /* Loop over allpass sections */
+ for( i = 0; i < order; i += 2 ) {
+ /* Output of allpass section */
+ tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 );
+ state[ i ] = tmp1;
+ C[ i ] += state[ 0 ] * tmp1;
+ /* Output of allpass section */
+ tmp1 = state[ i + 1 ] + warping * ( state[ i + 2 ] - tmp2 );
+ state[ i + 1 ] = tmp2;
+ C[ i + 1 ] += state[ 0 ] * tmp2;
+ }
+ state[ order ] = tmp1;
+ C[ order ] += state[ 0 ] * tmp1;
+ }
+
+ /* Copy correlations in silk_float output format */
+ for( i = 0; i < order + 1; i++ ) {
+ corr[ i ] = ( silk_float )C[ i ];
+ }
+}
diff --git a/drivers/opus/silk/float/wrappers_FLP.c b/drivers/opus/silk/float/wrappers_FLP.c
new file mode 100644
index 0000000000..c4e34e5578
--- /dev/null
+++ b/drivers/opus/silk/float/wrappers_FLP.c
@@ -0,0 +1,201 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Wrappers. Calls flp / fix code */
+
+/* Convert AR filter coefficients to NLSF parameters */
+void silk_A2NLSF_FLP(
+ opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */
+ const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */
+ const opus_int LPC_order /* I LPC order */
+)
+{
+ opus_int i;
+ opus_int32 a_fix_Q16[ MAX_LPC_ORDER ];
+
+ for( i = 0; i < LPC_order; i++ ) {
+ a_fix_Q16[ i ] = silk_float2int( pAR[ i ] * 65536.0f );
+ }
+
+ silk_A2NLSF( NLSF_Q15, a_fix_Q16, LPC_order );
+}
+
+/* Convert LSF parameters to AR prediction filter coefficients */
+void silk_NLSF2A_FLP(
+ silk_float *pAR, /* O LPC coefficients [ LPC_order ] */
+ const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */
+ const opus_int LPC_order /* I LPC order */
+)
+{
+ opus_int i;
+ opus_int16 a_fix_Q12[ MAX_LPC_ORDER ];
+
+ silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order );
+
+ for( i = 0; i < LPC_order; i++ ) {
+ pAR[ i ] = ( silk_float )a_fix_Q12[ i ] * ( 1.0f / 4096.0f );
+ }
+}
+
+/******************************************/
+/* Floating-point NLSF processing wrapper */
+/******************************************/
+void silk_process_NLSFs_FLP(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */
+ opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */
+ const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */
+)
+{
+ opus_int i, j;
+ opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+
+ silk_process_NLSFs( psEncC, PredCoef_Q12, NLSF_Q15, prev_NLSF_Q15);
+
+ for( j = 0; j < 2; j++ ) {
+ for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
+ PredCoef[ j ][ i ] = ( silk_float )PredCoef_Q12[ j ][ i ] * ( 1.0f / 4096.0f );
+ }
+ }
+}
+
+/****************************************/
+/* Floating-point Silk NSQ wrapper */
+/****************************************/
+void silk_NSQ_wrapper_FLP(
+ silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
+ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
+ SideInfoIndices *psIndices, /* I/O Quantization indices */
+ silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const silk_float x[] /* I Prefiltered input signal */
+)
+{
+ opus_int i, j;
+ opus_int32 x_Q3[ MAX_FRAME_LENGTH ];
+ opus_int32 Gains_Q16[ MAX_NB_SUBFR ];
+ silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+ opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+ opus_int LTP_scale_Q14;
+
+ /* Noise shaping parameters */
+ opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+ opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */
+ opus_int Lambda_Q10;
+ opus_int Tilt_Q14[ MAX_NB_SUBFR ];
+ opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ];
+
+ /* Convert control struct to fix control struct */
+ /* Noise shape parameters */
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ for( j = 0; j < psEnc->sCmn.shapingLPCOrder; j++ ) {
+ AR2_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR2[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f );
+ }
+ }
+
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ LF_shp_Q14[ i ] = silk_LSHIFT32( silk_float2int( psEncCtrl->LF_AR_shp[ i ] * 16384.0f ), 16 ) |
+ (opus_uint16)silk_float2int( psEncCtrl->LF_MA_shp[ i ] * 16384.0f );
+ Tilt_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->Tilt[ i ] * 16384.0f );
+ HarmShapeGain_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->HarmShapeGain[ i ] * 16384.0f );
+ }
+ Lambda_Q10 = ( opus_int )silk_float2int( psEncCtrl->Lambda * 1024.0f );
+
+ /* prediction and coding parameters */
+ for( i = 0; i < psEnc->sCmn.nb_subfr * LTP_ORDER; i++ ) {
+ LTPCoef_Q14[ i ] = (opus_int16)silk_float2int( psEncCtrl->LTPCoef[ i ] * 16384.0f );
+ }
+
+ for( j = 0; j < 2; j++ ) {
+ for( i = 0; i < psEnc->sCmn.predictLPCOrder; i++ ) {
+ PredCoef_Q12[ j ][ i ] = (opus_int16)silk_float2int( psEncCtrl->PredCoef[ j ][ i ] * 4096.0f );
+ }
+ }
+
+ for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+ Gains_Q16[ i ] = silk_float2int( psEncCtrl->Gains[ i ] * 65536.0f );
+ silk_assert( Gains_Q16[ i ] > 0 );
+ }
+
+ if( psIndices->signalType == TYPE_VOICED ) {
+ LTP_scale_Q14 = silk_LTPScales_table_Q14[ psIndices->LTP_scaleIndex ];
+ } else {
+ LTP_scale_Q14 = 0;
+ }
+
+ /* Convert input to fix */
+ for( i = 0; i < psEnc->sCmn.frame_length; i++ ) {
+ x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] );
+ }
+
+ /* Call NSQ */
+ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+ silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
+ AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+ } else {
+ silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
+ AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+ }
+}
+
+/***********************************************/
+/* Floating-point Silk LTP quantiation wrapper */
+/***********************************************/
+void silk_quant_LTP_gains_FLP(
+ silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */
+ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */
+ opus_int8 *periodicity_index, /* O Periodicity index */
+ opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */
+ const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */
+ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */
+ const opus_int lowComplexity, /* I Flag for low complexity */
+ const opus_int nb_subfr /* I number of subframes */
+)
+{
+ opus_int i;
+ opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ];
+ opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ];
+
+ for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
+ B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f );
+ }
+ for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) {
+ W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f );
+ }
+
+ silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr );
+
+ for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
+ B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f );
+ }
+}
diff --git a/drivers/opus/silk/gain_quant.c b/drivers/opus/silk/gain_quant.c
new file mode 100644
index 0000000000..e9467198eb
--- /dev/null
+++ b/drivers/opus/silk/gain_quant.c
@@ -0,0 +1,141 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+#define OFFSET ( ( MIN_QGAIN_DB * 128 ) / 6 + 16 * 128 )
+#define SCALE_Q16 ( ( 65536 * ( N_LEVELS_QGAIN - 1 ) ) / ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) )
+#define INV_SCALE_Q16 ( ( 65536 * ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) / ( N_LEVELS_QGAIN - 1 ) )
+
+/* Gain scalar quantization with hysteresis, uniform on log scale */
+void silk_gains_quant(
+ opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */
+ opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */
+ opus_int8 *prev_ind, /* I/O last index in previous frame */
+ const opus_int conditional, /* I first gain is delta coded if 1 */
+ const opus_int nb_subfr /* I number of subframes */
+)
+{
+ opus_int k, double_step_size_threshold;
+
+ for( k = 0; k < nb_subfr; k++ ) {
+ /* Convert to log scale, scale, floor() */
+ ind[ k ] = silk_SMULWB( SCALE_Q16, silk_lin2log( gain_Q16[ k ] ) - OFFSET );
+
+ /* Round towards previous quantized gain (hysteresis) */
+ if( ind[ k ] < *prev_ind ) {
+ ind[ k ]++;
+ }
+ ind[ k ] = silk_LIMIT_int( ind[ k ], 0, N_LEVELS_QGAIN - 1 );
+
+ /* Compute delta indices and limit */
+ if( k == 0 && conditional == 0 ) {
+ /* Full index */
+ ind[ k ] = silk_LIMIT_int( ind[ k ], *prev_ind + MIN_DELTA_GAIN_QUANT, N_LEVELS_QGAIN - 1 );
+ *prev_ind = ind[ k ];
+ } else {
+ /* Delta index */
+ ind[ k ] = ind[ k ] - *prev_ind;
+
+ /* Double the quantization step size for large gain increases, so that the max gain level can be reached */
+ double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind;
+ if( ind[ k ] > double_step_size_threshold ) {
+ ind[ k ] = double_step_size_threshold + silk_RSHIFT( ind[ k ] - double_step_size_threshold + 1, 1 );
+ }
+
+ ind[ k ] = silk_LIMIT_int( ind[ k ], MIN_DELTA_GAIN_QUANT, MAX_DELTA_GAIN_QUANT );
+
+ /* Accumulate deltas */
+ if( ind[ k ] > double_step_size_threshold ) {
+ *prev_ind += silk_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold;
+ } else {
+ *prev_ind += ind[ k ];
+ }
+
+ /* Shift to make non-negative */
+ ind[ k ] -= MIN_DELTA_GAIN_QUANT;
+ }
+
+ /* Scale and convert to linear scale */
+ gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */
+ }
+}
+
+/* Gains scalar dequantization, uniform on log scale */
+void silk_gains_dequant(
+ opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */
+ const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */
+ opus_int8 *prev_ind, /* I/O last index in previous frame */
+ const opus_int conditional, /* I first gain is delta coded if 1 */
+ const opus_int nb_subfr /* I number of subframes */
+)
+{
+ opus_int k, ind_tmp, double_step_size_threshold;
+
+ for( k = 0; k < nb_subfr; k++ ) {
+ if( k == 0 && conditional == 0 ) {
+ /* Gain index is not allowed to go down more than 16 steps (~21.8 dB) */
+ *prev_ind = silk_max_int( ind[ k ], *prev_ind - 16 );
+ } else {
+ /* Delta index */
+ ind_tmp = ind[ k ] + MIN_DELTA_GAIN_QUANT;
+
+ /* Accumulate deltas */
+ double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind;
+ if( ind_tmp > double_step_size_threshold ) {
+ *prev_ind += silk_LSHIFT( ind_tmp, 1 ) - double_step_size_threshold;
+ } else {
+ *prev_ind += ind_tmp;
+ }
+ }
+ *prev_ind = silk_LIMIT_int( *prev_ind, 0, N_LEVELS_QGAIN - 1 );
+
+ /* Scale and convert to linear scale */
+ gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */
+ }
+}
+
+/* Compute unique identifier of gain indices vector */
+opus_int32 silk_gains_ID( /* O returns unique identifier of gains */
+ const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */
+ const opus_int nb_subfr /* I number of subframes */
+)
+{
+ opus_int k;
+ opus_int32 gainsID;
+
+ gainsID = 0;
+ for( k = 0; k < nb_subfr; k++ ) {
+ gainsID = silk_ADD_LSHIFT32( ind[ k ], gainsID, 8 );
+ }
+
+ return gainsID;
+}
diff --git a/drivers/opus/silk/init_decoder.c b/drivers/opus/silk/init_decoder.c
new file mode 100644
index 0000000000..88c1ff7b43
--- /dev/null
+++ b/drivers/opus/silk/init_decoder.c
@@ -0,0 +1,56 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/************************/
+/* Init Decoder State */
+/************************/
+opus_int silk_init_decoder(
+ silk_decoder_state *psDec /* I/O Decoder state pointer */
+)
+{
+ /* Clear the entire encoder state, except anything copied */
+ silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+
+ /* Used to deactivate LSF interpolation */
+ psDec->first_frame_after_reset = 1;
+ psDec->prev_gain_Q16 = 65536;
+
+ /* Reset CNG state */
+ silk_CNG_Reset( psDec );
+
+ /* Reset PLC state */
+ silk_PLC_Reset( psDec );
+
+ return(0);
+}
+
diff --git a/drivers/opus/silk/init_encoder.c b/drivers/opus/silk/init_encoder.c
new file mode 100644
index 0000000000..baf97d49e7
--- /dev/null
+++ b/drivers/opus/silk/init_encoder.c
@@ -0,0 +1,64 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+#include "tuning_parameters.h"
+#include "cpu_support.h"
+
+/*********************************/
+/* Initialize Silk Encoder state */
+/*********************************/
+opus_int silk_init_encoder(
+ silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */
+ int arch /* I Run-time architecture */
+)
+{
+ opus_int ret = 0;
+
+ /* Clear the entire encoder state */
+ silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) );
+
+ psEnc->sCmn.arch = arch;
+
+ psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 );
+ psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15;
+
+ /* Used to deactivate LSF interpolation, pitch prediction */
+ psEnc->sCmn.first_frame_after_reset = 1;
+
+ /* Initialize Silk VAD */
+ ret += silk_VAD_Init( &psEnc->sCmn.sVAD );
+
+ return ret;
+}
diff --git a/drivers/opus/silk/inner_prod_aligned.c b/drivers/opus/silk/inner_prod_aligned.c
new file mode 100644
index 0000000000..d625001db7
--- /dev/null
+++ b/drivers/opus/silk/inner_prod_aligned.c
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+opus_int32 silk_inner_prod_aligned_scale(
+ const opus_int16 *const inVec1, /* I input vector 1 */
+ const opus_int16 *const inVec2, /* I input vector 2 */
+ const opus_int scale, /* I number of bits to shift */
+ const opus_int len /* I vector lengths */
+)
+{
+ opus_int i;
+ opus_int32 sum = 0;
+ for( i = 0; i < len; i++ ) {
+ sum = silk_ADD_RSHIFT32( sum, silk_SMULBB( inVec1[ i ], inVec2[ i ] ), scale );
+ }
+ return sum;
+}
diff --git a/drivers/opus/silk/interpolate.c b/drivers/opus/silk/interpolate.c
new file mode 100644
index 0000000000..d5df0feddb
--- /dev/null
+++ b/drivers/opus/silk/interpolate.c
@@ -0,0 +1,51 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Interpolate two vectors */
+void silk_interpolate(
+ opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */
+ const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */
+ const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */
+ const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */
+ const opus_int d /* I number of parameters */
+)
+{
+ opus_int i;
+
+ silk_assert( ifact_Q2 >= 0 );
+ silk_assert( ifact_Q2 <= 4 );
+
+ for( i = 0; i < d; i++ ) {
+ xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 );
+ }
+}
diff --git a/drivers/opus/silk/lin2log.c b/drivers/opus/silk/lin2log.c
new file mode 100644
index 0000000000..77bfc8c8ab
--- /dev/null
+++ b/drivers/opus/silk/lin2log.c
@@ -0,0 +1,46 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+/* Approximation of 128 * log2() (very close inverse of silk_log2lin()) */
+/* Convert input to a log scale */
+opus_int32 silk_lin2log(
+ const opus_int32 inLin /* I input in linear scale */
+)
+{
+ opus_int32 lz, frac_Q7;
+
+ silk_CLZ_FRAC( inLin, &lz, &frac_Q7 );
+
+ /* Piece-wise parabolic approximation */
+ return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 );
+}
+
diff --git a/drivers/opus/silk/log2lin.c b/drivers/opus/silk/log2lin.c
new file mode 100644
index 0000000000..0ed2a12efd
--- /dev/null
+++ b/drivers/opus/silk/log2lin.c
@@ -0,0 +1,58 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Approximation of 2^() (very close inverse of silk_lin2log()) */
+/* Convert input to a linear scale */
+opus_int32 silk_log2lin(
+ const opus_int32 inLog_Q7 /* I input on log scale */
+)
+{
+ opus_int32 out, frac_Q7;
+
+ if( inLog_Q7 < 0 ) {
+ return 0;
+ } else if ( inLog_Q7 >= 3967 ) {
+ return silk_int32_MAX;
+ }
+
+ out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) );
+ frac_Q7 = inLog_Q7 & 0x7F;
+ if( inLog_Q7 < 2048 ) {
+ /* Piece-wise parabolic approximation */
+ out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 );
+ } else {
+ /* Piece-wise parabolic approximation */
+ out = silk_MLA( out, silk_RSHIFT( out, 7 ), silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) );
+ }
+ return out;
+}
diff --git a/drivers/opus/silk/macros.h b/drivers/opus/silk/macros.h
new file mode 100644
index 0000000000..6cf2e93dbc
--- /dev/null
+++ b/drivers/opus/silk/macros.h
@@ -0,0 +1,113 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_H
+#define SILK_MACROS_H
+
+#include "opus_config.h"
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+/* This is an OPUS_INLINE header file for general platform. */
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+#define silk_SMULBB(a32, b32) ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32)))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
+#define silk_SMLABB(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))
+
+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
+#define silk_SMULBT(a32, b32) ((opus_int32)((opus_int16)(a32)) * ((b32) >> 16))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
+#define silk_SMLABT(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16))
+
+/* a64 + (b32 * c32) */
+#define silk_SMLAL(a64, b32, c32) (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32))))
+
+/* (a32 * b32) >> 16 */
+#define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
+
+/* a32 + ((b32 * c32) >> 16) */
+#define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
+
+/* add/subtract with output saturated */
+#define silk_ADD_SAT32(a, b) ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ? \
+ ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) : \
+ ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
+
+#define silk_SUB_SAT32(a, b) ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ? \
+ (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \
+ ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
+
+#include "ecintrin.h"
+
+static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
+{
+ return 32 - EC_ILOG(in16<<16|0x8000);
+}
+
+static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
+{
+ return in32 ? 32 - EC_ILOG(in32) : 32;
+}
+
+/* Row based */
+#define matrix_ptr(Matrix_base_adr, row, column, N) \
+ (*((Matrix_base_adr) + ((row)*(N)+(column))))
+#define matrix_adr(Matrix_base_adr, row, column, N) \
+ ((Matrix_base_adr) + ((row)*(N)+(column)))
+
+/* Column based */
+#ifndef matrix_c_ptr
+# define matrix_c_ptr(Matrix_base_adr, row, column, M) \
+ (*((Matrix_base_adr) + ((row)+(M)*(column))))
+#endif
+
+#ifdef OPUS_ARM_INLINE_ASM
+#include "arm/macros_armv4.h"
+#endif
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/macros_armv5e.h"
+#endif
+
+#endif /* SILK_MACROS_H */
+
diff --git a/drivers/opus/silk/pitch_est_defines.h b/drivers/opus/silk/pitch_est_defines.h
new file mode 100644
index 0000000000..e1e4b5d768
--- /dev/null
+++ b/drivers/opus/silk/pitch_est_defines.h
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_PE_DEFINES_H
+#define SILK_PE_DEFINES_H
+
+#include "SigProc_FIX.h"
+
+/********************************************************/
+/* Definitions for pitch estimator */
+/********************************************************/
+
+#define PE_MAX_FS_KHZ 16 /* Maximum sampling frequency used */
+
+#define PE_MAX_NB_SUBFR 4
+#define PE_SUBFR_LENGTH_MS 5 /* 5 ms */
+
+#define PE_LTP_MEM_LENGTH_MS ( 4 * PE_SUBFR_LENGTH_MS )
+
+#define PE_MAX_FRAME_LENGTH_MS ( PE_LTP_MEM_LENGTH_MS + PE_MAX_NB_SUBFR * PE_SUBFR_LENGTH_MS )
+#define PE_MAX_FRAME_LENGTH ( PE_MAX_FRAME_LENGTH_MS * PE_MAX_FS_KHZ )
+#define PE_MAX_FRAME_LENGTH_ST_1 ( PE_MAX_FRAME_LENGTH >> 2 )
+#define PE_MAX_FRAME_LENGTH_ST_2 ( PE_MAX_FRAME_LENGTH >> 1 )
+
+#define PE_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */
+#define PE_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */
+#define PE_MAX_LAG ( PE_MAX_LAG_MS * PE_MAX_FS_KHZ )
+#define PE_MIN_LAG ( PE_MIN_LAG_MS * PE_MAX_FS_KHZ )
+
+#define PE_D_SRCH_LENGTH 24
+
+#define PE_NB_STAGE3_LAGS 5
+
+#define PE_NB_CBKS_STAGE2 3
+#define PE_NB_CBKS_STAGE2_EXT 11
+
+#define PE_NB_CBKS_STAGE3_MAX 34
+#define PE_NB_CBKS_STAGE3_MID 24
+#define PE_NB_CBKS_STAGE3_MIN 16
+
+#define PE_NB_CBKS_STAGE3_10MS 12
+#define PE_NB_CBKS_STAGE2_10MS 3
+
+#define PE_SHORTLAG_BIAS 0.2f /* for logarithmic weighting */
+#define PE_PREVLAG_BIAS 0.2f /* for logarithmic weighting */
+#define PE_FLATCONTOUR_BIAS 0.05f
+
+#define SILK_PE_MIN_COMPLEX 0
+#define SILK_PE_MID_COMPLEX 1
+#define SILK_PE_MAX_COMPLEX 2
+
+/* Tables for 20 ms frames */
+extern const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ];
+extern const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ];
+extern const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ];
+extern const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ];
+
+/* Tables for 10 ms frames */
+extern const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ 3 ];
+extern const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 12 ];
+extern const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ];
+
+#endif
+
diff --git a/drivers/opus/silk/pitch_est_tables.c b/drivers/opus/silk/pitch_est_tables.c
new file mode 100644
index 0000000000..97ddbab010
--- /dev/null
+++ b/drivers/opus/silk/pitch_est_tables.c
@@ -0,0 +1,99 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "typedef.h"
+#include "pitch_est_defines.h"
+
+const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ PE_NB_CBKS_STAGE2_10MS ] =
+{
+ {0, 1, 0},
+ {0, 0, 1}
+};
+
+const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ PE_NB_CBKS_STAGE3_10MS ] =
+{
+ { 0, 0, 1,-1, 1,-1, 2,-2, 2,-2, 3,-3},
+ { 0, 1, 0, 1,-1, 2,-1, 2,-2, 3,-2, 3}
+};
+
+const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ] =
+{
+ {-3, 7},
+ {-2, 7}
+};
+
+const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ] =
+{
+ {0, 2,-1,-1,-1, 0, 0, 1, 1, 0, 1},
+ {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0},
+ {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0},
+ {0,-1, 2, 1, 0, 1, 1, 0, 0,-1,-1}
+};
+
+const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ] =
+{
+ {0, 0, 1,-1, 0, 1,-1, 0,-1, 1,-2, 2,-2,-2, 2,-3, 2, 3,-3,-4, 3,-4, 4, 4,-5, 5,-6,-5, 6,-7, 6, 5, 8,-9},
+ {0, 0, 1, 0, 0, 0, 0, 0, 0, 0,-1, 1, 0, 0, 1,-1, 0, 1,-1,-1, 1,-1, 2, 1,-1, 2,-2,-2, 2,-2, 2, 2, 3,-3},
+ {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,-1, 1, 0, 0, 2, 1,-1, 2,-1,-1, 2,-1, 2, 2,-1, 3,-2,-2,-2, 3},
+ {0, 1, 0, 0, 1, 0, 1,-1, 2,-1, 2,-1, 2, 3,-2, 3,-2,-2, 4, 4,-3, 5,-3,-4, 6,-4, 6, 5,-5, 8,-6,-5,-7, 9}
+};
+
+const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ] =
+{
+ /* Lags to search for low number of stage3 cbks */
+ {
+ {-5,8},
+ {-1,6},
+ {-1,6},
+ {-4,10}
+ },
+ /* Lags to search for middle number of stage3 cbks */
+ {
+ {-6,10},
+ {-2,6},
+ {-1,6},
+ {-5,10}
+ },
+ /* Lags to search for max number of stage3 cbks */
+ {
+ {-9,12},
+ {-3,7},
+ {-2,7},
+ {-7,13}
+ }
+};
+
+const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ] =
+{
+ PE_NB_CBKS_STAGE3_MIN,
+ PE_NB_CBKS_STAGE3_MID,
+ PE_NB_CBKS_STAGE3_MAX
+};
diff --git a/drivers/opus/silk/process_NLSFs.c b/drivers/opus/silk/process_NLSFs.c
new file mode 100644
index 0000000000..0193fda1f1
--- /dev/null
+++ b/drivers/opus/silk/process_NLSFs.c
@@ -0,0 +1,105 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Limit, stabilize, convert and quantize NLSFs */
+void silk_process_NLSFs(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */
+ opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */
+ const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */
+)
+{
+ opus_int i, doInterpolate;
+ opus_int NLSF_mu_Q20;
+ opus_int32 i_sqr_Q15;
+ opus_int16 pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
+ opus_int16 pNLSFW_QW[ MAX_LPC_ORDER ];
+ opus_int16 pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
+
+ silk_assert( psEncC->speech_activity_Q8 >= 0 );
+ silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) );
+ silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) );
+
+ /***********************/
+ /* Calculate mu values */
+ /***********************/
+ /* NLSF_mu = 0.003 - 0.0015 * psEnc->speech_activity; */
+ NLSF_mu_Q20 = silk_SMLAWB( SILK_FIX_CONST( 0.003, 20 ), SILK_FIX_CONST( -0.001, 28 ), psEncC->speech_activity_Q8 );
+ if( psEncC->nb_subfr == 2 ) {
+ /* Multiply by 1.5 for 10 ms packets */
+ NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 );
+ }
+
+ silk_assert( NLSF_mu_Q20 > 0 );
+ silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) );
+
+ /* Calculate NLSF weights */
+ silk_NLSF_VQ_weights_laroia( pNLSFW_QW, pNLSF_Q15, psEncC->predictLPCOrder );
+
+ /* Update NLSF weights for interpolated NLSFs */
+ doInterpolate = ( psEncC->useInterpolatedNLSFs == 1 ) && ( psEncC->indices.NLSFInterpCoef_Q2 < 4 );
+ if( doInterpolate ) {
+ /* Calculate the interpolated NLSF vector for the first half */
+ silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
+ psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
+
+ /* Calculate first half NLSF weights for the interpolated NLSFs */
+ silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder );
+
+ /* Update NLSF weights with contribution from first half */
+ i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
+ for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
+ pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 );
+ silk_assert( pNLSFW_QW[ i ] >= 1 );
+ }
+ }
+
+ silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW,
+ NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType );
+
+ /* Convert quantized NLSFs back to LPC coefficients */
+ silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder );
+
+ if( doInterpolate ) {
+ /* Calculate the interpolated, quantized LSF vector for the first half */
+ silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
+ psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
+
+ /* Convert back to LPC coefficients */
+ silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder );
+
+ } else {
+ /* Copy LPC coefficients for first half from second half */
+ silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
+ }
+}
diff --git a/drivers/opus/silk/quant_LTP_gains.c b/drivers/opus/silk/quant_LTP_gains.c
new file mode 100644
index 0000000000..34bcd3acdb
--- /dev/null
+++ b/drivers/opus/silk/quant_LTP_gains.c
@@ -0,0 +1,128 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "tuning_parameters.h"
+
+void silk_quant_LTP_gains(
+ opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */
+ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */
+ opus_int8 *periodicity_index, /* O Periodicity Index */
+ opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */
+ const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */
+ opus_int mu_Q9, /* I Mu value (R/D tradeoff) */
+ opus_int lowComplexity, /* I Flag for low complexity */
+ const opus_int nb_subfr /* I number of subframes */
+)
+{
+ opus_int j, k, cbk_size;
+ opus_int8 temp_idx[ MAX_NB_SUBFR ];
+ const opus_uint8 *cl_ptr_Q5;
+ const opus_int8 *cbk_ptr_Q7;
+ const opus_uint8 *cbk_gain_ptr_Q7;
+ const opus_int16 *b_Q14_ptr;
+ const opus_int32 *W_Q18_ptr;
+ opus_int32 rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14;
+ opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;
+
+ /***************************************************/
+ /* iterate over different codebooks with different */
+ /* rates/distortions, and choose best */
+ /***************************************************/
+ min_rate_dist_Q14 = silk_int32_MAX;
+ best_sum_log_gain_Q7 = 0;
+ for( k = 0; k < 3; k++ ) {
+ /* Safety margin for pitch gain control, to take into account factors
+ such as state rescaling/rewhitening. */
+ opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 );
+
+ cl_ptr_Q5 = silk_LTP_gain_BITS_Q5_ptrs[ k ];
+ cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ k ];
+ cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ];
+ cbk_size = silk_LTP_vq_sizes[ k ];
+
+ /* Set up pointer to first subframe */
+ W_Q18_ptr = W_Q18;
+ b_Q14_ptr = B_Q14;
+
+ rate_dist_Q14 = 0;
+ sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;
+ for( j = 0; j < nb_subfr; j++ ) {
+ max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 )
+ + SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
+
+ silk_VQ_WMat_EC(
+ &temp_idx[ j ], /* O index of best codebook vector */
+ &rate_dist_Q14_subfr, /* O best weighted quantization error + mu * rate */
+ &gain_Q7, /* O sum of absolute LTP coefficients */
+ b_Q14_ptr, /* I input vector to be quantized */
+ W_Q18_ptr, /* I weighting matrix */
+ cbk_ptr_Q7, /* I codebook */
+ cbk_gain_ptr_Q7, /* I codebook effective gains */
+ cl_ptr_Q5, /* I code length for each codebook vector */
+ mu_Q9, /* I tradeoff between weighted error and rate */
+ max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
+ cbk_size /* I number of vectors in codebook */
+ );
+
+ rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr );
+ sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7
+ + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 ));
+
+ b_Q14_ptr += LTP_ORDER;
+ W_Q18_ptr += LTP_ORDER * LTP_ORDER;
+ }
+
+ /* Avoid never finding a codebook */
+ rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 );
+
+ if( rate_dist_Q14 < min_rate_dist_Q14 ) {
+ min_rate_dist_Q14 = rate_dist_Q14;
+ *periodicity_index = (opus_int8)k;
+ silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) );
+ best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;
+ }
+
+ /* Break early in low-complexity mode if rate distortion is below threshold */
+ if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) {
+ break;
+ }
+ }
+
+ cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ];
+ for( j = 0; j < nb_subfr; j++ ) {
+ for( k = 0; k < LTP_ORDER; k++ ) {
+ B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 );
+ }
+ }
+ *sum_log_gain_Q7 = best_sum_log_gain_Q7;
+}
+
diff --git a/drivers/opus/silk/resampler.c b/drivers/opus/silk/resampler.c
new file mode 100644
index 0000000000..14b185c45e
--- /dev/null
+++ b/drivers/opus/silk/resampler.c
@@ -0,0 +1,215 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/*
+ * Matrix of resampling methods used:
+ * Fs_out (kHz)
+ * 8 12 16 24 48
+ *
+ * 8 C UF U UF UF
+ * 12 AF C UF U UF
+ * Fs_in (kHz) 16 D AF C UF UF
+ * 24 AF D AF C U
+ * 48 AF AF AF D C
+ *
+ * C -> Copy (no resampling)
+ * D -> Allpass-based 2x downsampling
+ * U -> Allpass-based 2x upsampling
+ * UF -> Allpass-based 2x upsampling followed by FIR interpolation
+ * AF -> AR2 filter followed by FIR interpolation
+ */
+
+#include "resampler_private.h"
+
+/* Tables with delay compensation values to equalize total delay for different modes */
+static const opus_int8 delay_matrix_enc[ 5 ][ 3 ] = {
+/* in \ out 8 12 16 */
+/* 8 */ { 6, 0, 3 },
+/* 12 */ { 0, 7, 3 },
+/* 16 */ { 0, 1, 10 },
+/* 24 */ { 0, 2, 6 },
+/* 48 */ { 18, 10, 12 }
+};
+
+static const opus_int8 delay_matrix_dec[ 3 ][ 5 ] = {
+/* in \ out 8 12 16 24 48 */
+/* 8 */ { 4, 0, 2, 0, 0 },
+/* 12 */ { 0, 9, 4, 7, 4 },
+/* 16 */ { 0, 3, 12, 7, 7 }
+};
+
+/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0, 1, 2, 3, 4] */
+#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 )
+
+#define USE_silk_resampler_copy (0)
+#define USE_silk_resampler_private_up2_HQ_wrapper (1)
+#define USE_silk_resampler_private_IIR_FIR (2)
+#define USE_silk_resampler_private_down_FIR (3)
+
+/* Initialize/reset the resampler state for a given pair of input/output sampling rates */
+opus_int silk_resampler_init(
+ silk_resampler_state_struct *S, /* I/O Resampler state */
+ opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */
+ opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */
+ opus_int forEnc /* I If 1: encoder; if 0: decoder */
+)
+{
+ opus_int up2x;
+
+ /* Clear state */
+ silk_memset( S, 0, sizeof( silk_resampler_state_struct ) );
+
+ /* Input checking */
+ if( forEnc ) {
+ if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 && Fs_Hz_in != 24000 && Fs_Hz_in != 48000 ) ||
+ ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) {
+ silk_assert( 0 );
+ return -1;
+ }
+ S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
+ } else {
+ if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 ) ||
+ ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) {
+ silk_assert( 0 );
+ return -1;
+ }
+ S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
+ }
+
+ S->Fs_in_kHz = silk_DIV32_16( Fs_Hz_in, 1000 );
+ S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 );
+
+ /* Number of samples processed per batch */
+ S->batchSize = S->Fs_in_kHz * RESAMPLER_MAX_BATCH_SIZE_MS;
+
+ /* Find resampler with the right sampling ratio */
+ up2x = 0;
+ if( Fs_Hz_out > Fs_Hz_in ) {
+ /* Upsample */
+ if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 1 */
+ /* Special case: directly use 2x upsampler */
+ S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper;
+ } else {
+ /* Default resampler */
+ S->resampler_function = USE_silk_resampler_private_IIR_FIR;
+ up2x = 1;
+ }
+ } else if ( Fs_Hz_out < Fs_Hz_in ) {
+ /* Downsample */
+ S->resampler_function = USE_silk_resampler_private_down_FIR;
+ if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) { /* Fs_out : Fs_in = 3 : 4 */
+ S->FIR_Fracs = 3;
+ S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0;
+ S->Coefs = silk_Resampler_3_4_COEFS;
+ } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 3 */
+ S->FIR_Fracs = 2;
+ S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0;
+ S->Coefs = silk_Resampler_2_3_COEFS;
+ } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 2 */
+ S->FIR_Fracs = 1;
+ S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR1;
+ S->Coefs = silk_Resampler_1_2_COEFS;
+ } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 3 */
+ S->FIR_Fracs = 1;
+ S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+ S->Coefs = silk_Resampler_1_3_COEFS;
+ } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 4 */
+ S->FIR_Fracs = 1;
+ S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+ S->Coefs = silk_Resampler_1_4_COEFS;
+ } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 6 */
+ S->FIR_Fracs = 1;
+ S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+ S->Coefs = silk_Resampler_1_6_COEFS;
+ } else {
+ /* None available */
+ silk_assert( 0 );
+ return -1;
+ }
+ } else {
+ /* Input and output sampling rates are equal: copy */
+ S->resampler_function = USE_silk_resampler_copy;
+ }
+
+ /* Ratio of input/output samples */
+ S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2x ), Fs_Hz_out ), 2 );
+ /* Make sure the ratio is rounded up */
+ while( silk_SMULWW( S->invRatio_Q16, Fs_Hz_out ) < silk_LSHIFT32( Fs_Hz_in, up2x ) ) {
+ S->invRatio_Q16++;
+ }
+
+ return 0;
+}
+
+/* Resampler: convert from one sampling rate to another */
+/* Input and output sampling rate are at most 48000 Hz */
+opus_int silk_resampler(
+ silk_resampler_state_struct *S, /* I/O Resampler state */
+ opus_int16 out[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ opus_int32 inLen /* I Number of input samples */
+)
+{
+ opus_int nSamples;
+
+ /* Need at least 1 ms of input data */
+ silk_assert( inLen >= S->Fs_in_kHz );
+ /* Delay can't exceed the 1 ms of buffering */
+ silk_assert( S->inputDelay <= S->Fs_in_kHz );
+
+ nSamples = S->Fs_in_kHz - S->inputDelay;
+
+ /* Copy to delay buffer */
+ silk_memcpy( &S->delayBuf[ S->inputDelay ], in, nSamples * sizeof( opus_int16 ) );
+
+ switch( S->resampler_function ) {
+ case USE_silk_resampler_private_up2_HQ_wrapper:
+ silk_resampler_private_up2_HQ_wrapper( S, out, S->delayBuf, S->Fs_in_kHz );
+ silk_resampler_private_up2_HQ_wrapper( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+ break;
+ case USE_silk_resampler_private_IIR_FIR:
+ silk_resampler_private_IIR_FIR( S, out, S->delayBuf, S->Fs_in_kHz );
+ silk_resampler_private_IIR_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+ break;
+ case USE_silk_resampler_private_down_FIR:
+ silk_resampler_private_down_FIR( S, out, S->delayBuf, S->Fs_in_kHz );
+ silk_resampler_private_down_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+ break;
+ default:
+ silk_memcpy( out, S->delayBuf, S->Fs_in_kHz * sizeof( opus_int16 ) );
+ silk_memcpy( &out[ S->Fs_out_kHz ], &in[ nSamples ], ( inLen - S->Fs_in_kHz ) * sizeof( opus_int16 ) );
+ }
+
+ /* Copy to delay buffer */
+ silk_memcpy( S->delayBuf, &in[ inLen - S->inputDelay ], S->inputDelay * sizeof( opus_int16 ) );
+
+ return 0;
+}
diff --git a/drivers/opus/silk/resampler_down2.c b/drivers/opus/silk/resampler_down2.c
new file mode 100644
index 0000000000..5c4b27759a
--- /dev/null
+++ b/drivers/opus/silk/resampler_down2.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_rom.h"
+
+/* Downsample by a factor 2 */
+void silk_resampler_down2(
+ opus_int32 *S, /* I/O State vector [ 2 ] */
+ opus_int16 *out, /* O Output signal [ floor(len/2) ] */
+ const opus_int16 *in, /* I Input signal [ len ] */
+ opus_int32 inLen /* I Number of input samples */
+)
+{
+ opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 );
+ opus_int32 in32, out32, Y, X;
+
+ silk_assert( silk_resampler_down2_0 > 0 );
+ silk_assert( silk_resampler_down2_1 < 0 );
+
+ /* Internal variables and state are in Q10 format */
+ for( k = 0; k < len2; k++ ) {
+ /* Convert to Q10 */
+ in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 );
+
+ /* All-pass section for even input sample */
+ Y = silk_SUB32( in32, S[ 0 ] );
+ X = silk_SMLAWB( Y, Y, silk_resampler_down2_1 );
+ out32 = silk_ADD32( S[ 0 ], X );
+ S[ 0 ] = silk_ADD32( in32, X );
+
+ /* Convert to Q10 */
+ in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 );
+
+ /* All-pass section for odd input sample, and add to output of previous section */
+ Y = silk_SUB32( in32, S[ 1 ] );
+ X = silk_SMULWB( Y, silk_resampler_down2_0 );
+ out32 = silk_ADD32( out32, S[ 1 ] );
+ out32 = silk_ADD32( out32, X );
+ S[ 1 ] = silk_ADD32( in32, X );
+
+ /* Add, convert back to int16 and store to output */
+ out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32, 11 ) );
+ }
+}
+
diff --git a/drivers/opus/silk/resampler_down2_3.c b/drivers/opus/silk/resampler_down2_3.c
new file mode 100644
index 0000000000..2733072fe6
--- /dev/null
+++ b/drivers/opus/silk/resampler_down2_3.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+#define ORDER_FIR 4
+
+/* Downsample by a factor 2/3, low quality */
+void silk_resampler_down2_3(
+ opus_int32 *S, /* I/O State vector [ 6 ] */
+ opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */
+ const opus_int16 *in, /* I Input signal [ inLen ] */
+ opus_int32 inLen /* I Number of input samples */
+)
+{
+ opus_int32 nSamplesIn, counter, res_Q6;
+ VARDECL( opus_int32, buf );
+ opus_int32 *buf_ptr;
+ SAVE_STACK;
+
+ ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 );
+
+ /* Copy buffered samples to start of buffer */
+ silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) );
+
+ /* Iterate over blocks of frameSizeIn input samples */
+ while( 1 ) {
+ nSamplesIn = silk_min( inLen, RESAMPLER_MAX_BATCH_SIZE_IN );
+
+ /* Second-order AR filter (output in Q8) */
+ silk_resampler_private_AR2( &S[ ORDER_FIR ], &buf[ ORDER_FIR ], in,
+ silk_Resampler_2_3_COEFS_LQ, nSamplesIn );
+
+ /* Interpolate filtered signal */
+ buf_ptr = buf;
+ counter = nSamplesIn;
+ while( counter > 2 ) {
+ /* Inner product */
+ res_Q6 = silk_SMULWB( buf_ptr[ 0 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] );
+
+ /* Scale down, saturate and store in output array */
+ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+
+ res_Q6 = silk_SMULWB( buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] );
+
+ /* Scale down, saturate and store in output array */
+ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+
+ buf_ptr += 3;
+ counter -= 3;
+ }
+
+ in += nSamplesIn;
+ inLen -= nSamplesIn;
+
+ if( inLen > 0 ) {
+ /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+ silk_memcpy( buf, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );
+ } else {
+ break;
+ }
+ }
+
+ /* Copy last part of filtered signal to the state for the next call */
+ silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/resampler_private.h b/drivers/opus/silk/resampler_private.h
new file mode 100644
index 0000000000..422a7d9d95
--- /dev/null
+++ b/drivers/opus/silk/resampler_private.h
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_RESAMPLER_PRIVATE_H
+#define SILK_RESAMPLER_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_structs.h"
+#include "resampler_rom.h"
+
+/* Number of input samples to process in the inner loop */
+#define RESAMPLER_MAX_BATCH_SIZE_MS 10
+#define RESAMPLER_MAX_FS_KHZ 48
+#define RESAMPLER_MAX_BATCH_SIZE_IN ( RESAMPLER_MAX_BATCH_SIZE_MS * RESAMPLER_MAX_FS_KHZ )
+
+/* Description: Hybrid IIR/FIR polyphase implementation of resampling */
+void silk_resampler_private_IIR_FIR(
+ void *SS, /* I/O Resampler state */
+ opus_int16 out[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ opus_int32 inLen /* I Number of input samples */
+);
+
+/* Description: Hybrid IIR/FIR polyphase implementation of resampling */
+void silk_resampler_private_down_FIR(
+ void *SS, /* I/O Resampler state */
+ opus_int16 out[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ opus_int32 inLen /* I Number of input samples */
+);
+
+/* Upsample by a factor 2, high quality */
+void silk_resampler_private_up2_HQ_wrapper(
+ void *SS, /* I/O Resampler state (unused) */
+ opus_int16 *out, /* O Output signal [ 2 * len ] */
+ const opus_int16 *in, /* I Input signal [ len ] */
+ opus_int32 len /* I Number of input samples */
+);
+
+/* Upsample by a factor 2, high quality */
+void silk_resampler_private_up2_HQ(
+ opus_int32 *S, /* I/O Resampler state [ 6 ] */
+ opus_int16 *out, /* O Output signal [ 2 * len ] */
+ const opus_int16 *in, /* I Input signal [ len ] */
+ opus_int32 len /* I Number of input samples */
+);
+
+/* Second order AR filter */
+void silk_resampler_private_AR2(
+ opus_int32 S[], /* I/O State vector [ 2 ] */
+ opus_int32 out_Q8[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ const opus_int16 A_Q14[], /* I AR coefficients, Q14 */
+ opus_int32 len /* I Signal length */
+);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* SILK_RESAMPLER_PRIVATE_H */
diff --git a/drivers/opus/silk/resampler_private_AR2.c b/drivers/opus/silk/resampler_private_AR2.c
new file mode 100644
index 0000000000..84157d17ba
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_AR2.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+
+/* Second order AR filter with single delay elements */
+void silk_resampler_private_AR2(
+ opus_int32 S[], /* I/O State vector [ 2 ] */
+ opus_int32 out_Q8[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ const opus_int16 A_Q14[], /* I AR coefficients, Q14 */
+ opus_int32 len /* I Signal length */
+)
+{
+ opus_int32 k;
+ opus_int32 out32;
+
+ for( k = 0; k < len; k++ ) {
+ out32 = silk_ADD_LSHIFT32( S[ 0 ], (opus_int32)in[ k ], 8 );
+ out_Q8[ k ] = out32;
+ out32 = silk_LSHIFT( out32, 2 );
+ S[ 0 ] = silk_SMLAWB( S[ 1 ], out32, A_Q14[ 0 ] );
+ S[ 1 ] = silk_SMULWB( out32, A_Q14[ 1 ] );
+ }
+}
+
diff --git a/drivers/opus/silk/resampler_private_IIR_FIR.c b/drivers/opus/silk/resampler_private_IIR_FIR.c
new file mode 100644
index 0000000000..f45c3e7413
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_IIR_FIR.c
@@ -0,0 +1,107 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
+ opus_int16 *out,
+ opus_int16 *buf,
+ opus_int32 max_index_Q16,
+ opus_int32 index_increment_Q16
+)
+{
+ opus_int32 index_Q16, res_Q15;
+ opus_int16 *buf_ptr;
+ opus_int32 table_index;
+
+ /* Interpolate upsampled signal and store in output array */
+ for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+ table_index = silk_SMULWB( index_Q16 & 0xFFFF, 12 );
+ buf_ptr = &buf[ index_Q16 >> 16 ];
+
+ res_Q15 = silk_SMULBB( buf_ptr[ 0 ], silk_resampler_frac_FIR_12[ table_index ][ 0 ] );
+ res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 1 ], silk_resampler_frac_FIR_12[ table_index ][ 1 ] );
+ res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 2 ], silk_resampler_frac_FIR_12[ table_index ][ 2 ] );
+ res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 3 ], silk_resampler_frac_FIR_12[ table_index ][ 3 ] );
+ res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 4 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 3 ] );
+ res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 5 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 2 ] );
+ res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 6 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 1 ] );
+ res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 7 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 0 ] );
+ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q15, 15 ) );
+ }
+ return out;
+}
+/* Upsample using a combination of allpass-based 2x upsampling and FIR interpolation */
+void silk_resampler_private_IIR_FIR(
+ void *SS, /* I/O Resampler state */
+ opus_int16 out[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ opus_int32 inLen /* I Number of input samples */
+)
+{
+ silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+ opus_int32 nSamplesIn;
+ opus_int32 max_index_Q16, index_increment_Q16;
+ VARDECL( opus_int16, buf );
+ SAVE_STACK;
+
+ ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 );
+
+ /* Copy buffered samples to start of buffer */
+ silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+
+ /* Iterate over blocks of frameSizeIn input samples */
+ index_increment_Q16 = S->invRatio_Q16;
+ while( 1 ) {
+ nSamplesIn = silk_min( inLen, S->batchSize );
+
+ /* Upsample 2x */
+ silk_resampler_private_up2_HQ( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_12 ], in, nSamplesIn );
+
+ max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 + 1 ); /* + 1 because 2x upsampling */
+ out = silk_resampler_private_IIR_FIR_INTERPOL( out, buf, max_index_Q16, index_increment_Q16 );
+ in += nSamplesIn;
+ inLen -= nSamplesIn;
+
+ if( inLen > 0 ) {
+ /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+ silk_memcpy( buf, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+ } else {
+ break;
+ }
+ }
+
+ /* Copy last part of filtered signal to the state for the next call */
+ silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/resampler_private_down_FIR.c b/drivers/opus/silk/resampler_private_down_FIR.c
new file mode 100644
index 0000000000..f4de303546
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_down_FIR.c
@@ -0,0 +1,194 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
+ opus_int16 *out,
+ opus_int32 *buf,
+ const opus_int16 *FIR_Coefs,
+ opus_int FIR_Order,
+ opus_int FIR_Fracs,
+ opus_int32 max_index_Q16,
+ opus_int32 index_increment_Q16
+)
+{
+ opus_int32 index_Q16, res_Q6;
+ opus_int32 *buf_ptr;
+ opus_int32 interpol_ind;
+ const opus_int16 *interpol_ptr;
+
+ switch( FIR_Order ) {
+ case RESAMPLER_DOWN_ORDER_FIR0:
+ for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+ /* Integer part gives pointer to buffered input */
+ buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+ /* Fractional part gives interpolation coefficients */
+ interpol_ind = silk_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs );
+
+ /* Inner product */
+ interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * interpol_ind ];
+ res_Q6 = silk_SMULWB( buf_ptr[ 0 ], interpol_ptr[ 0 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 6 ], interpol_ptr[ 6 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 7 ], interpol_ptr[ 7 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 8 ], interpol_ptr[ 8 ] );
+ interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * ( FIR_Fracs - 1 - interpol_ind ) ];
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 17 ], interpol_ptr[ 0 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 16 ], interpol_ptr[ 1 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 15 ], interpol_ptr[ 2 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 14 ], interpol_ptr[ 3 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 13 ], interpol_ptr[ 4 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 12 ], interpol_ptr[ 5 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 6 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 7 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 9 ], interpol_ptr[ 8 ] );
+
+ /* Scale down, saturate and store in output array */
+ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+ }
+ break;
+ case RESAMPLER_DOWN_ORDER_FIR1:
+ for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+ /* Integer part gives pointer to buffered input */
+ buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+ /* Inner product */
+ res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 23 ] ), FIR_Coefs[ 0 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 22 ] ), FIR_Coefs[ 1 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 21 ] ), FIR_Coefs[ 2 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 20 ] ), FIR_Coefs[ 3 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 19 ] ), FIR_Coefs[ 4 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 18 ] ), FIR_Coefs[ 5 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 17 ] ), FIR_Coefs[ 6 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 16 ] ), FIR_Coefs[ 7 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 15 ] ), FIR_Coefs[ 8 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 14 ] ), FIR_Coefs[ 9 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 13 ] ), FIR_Coefs[ 10 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 12 ] ), FIR_Coefs[ 11 ] );
+
+ /* Scale down, saturate and store in output array */
+ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+ }
+ break;
+ case RESAMPLER_DOWN_ORDER_FIR2:
+ for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+ /* Integer part gives pointer to buffered input */
+ buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+ /* Inner product */
+ res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 35 ] ), FIR_Coefs[ 0 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 34 ] ), FIR_Coefs[ 1 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 33 ] ), FIR_Coefs[ 2 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 32 ] ), FIR_Coefs[ 3 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 31 ] ), FIR_Coefs[ 4 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 30 ] ), FIR_Coefs[ 5 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 29 ] ), FIR_Coefs[ 6 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 28 ] ), FIR_Coefs[ 7 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 27 ] ), FIR_Coefs[ 8 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 26 ] ), FIR_Coefs[ 9 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 25 ] ), FIR_Coefs[ 10 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 24 ] ), FIR_Coefs[ 11 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 12 ], buf_ptr[ 23 ] ), FIR_Coefs[ 12 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 13 ], buf_ptr[ 22 ] ), FIR_Coefs[ 13 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 14 ], buf_ptr[ 21 ] ), FIR_Coefs[ 14 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 15 ], buf_ptr[ 20 ] ), FIR_Coefs[ 15 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] );
+ res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] );
+
+ /* Scale down, saturate and store in output array */
+ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+ }
+ break;
+ default:
+ silk_assert( 0 );
+ }
+ return out;
+}
+
+/* Resample with a 2nd order AR filter followed by FIR interpolation */
+void silk_resampler_private_down_FIR(
+ void *SS, /* I/O Resampler state */
+ opus_int16 out[], /* O Output signal */
+ const opus_int16 in[], /* I Input signal */
+ opus_int32 inLen /* I Number of input samples */
+)
+{
+ silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+ opus_int32 nSamplesIn;
+ opus_int32 max_index_Q16, index_increment_Q16;
+ VARDECL( opus_int32, buf );
+ const opus_int16 *FIR_Coefs;
+ SAVE_STACK;
+
+ ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 );
+
+ /* Copy buffered samples to start of buffer */
+ silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) );
+
+ FIR_Coefs = &S->Coefs[ 2 ];
+
+ /* Iterate over blocks of frameSizeIn input samples */
+ index_increment_Q16 = S->invRatio_Q16;
+ while( 1 ) {
+ nSamplesIn = silk_min( inLen, S->batchSize );
+
+ /* Second-order AR filter (output in Q8) */
+ silk_resampler_private_AR2( S->sIIR, &buf[ S->FIR_Order ], in, S->Coefs, nSamplesIn );
+
+ max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 );
+
+ /* Interpolate filtered signal */
+ out = silk_resampler_private_down_FIR_INTERPOL( out, buf, FIR_Coefs, S->FIR_Order,
+ S->FIR_Fracs, max_index_Q16, index_increment_Q16 );
+
+ in += nSamplesIn;
+ inLen -= nSamplesIn;
+
+ if( inLen > 1 ) {
+ /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+ silk_memcpy( buf, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );
+ } else {
+ break;
+ }
+ }
+
+ /* Copy last part of filtered signal to the state for the next call */
+ silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/resampler_private_up2_HQ.c b/drivers/opus/silk/resampler_private_up2_HQ.c
new file mode 100644
index 0000000000..39f4818454
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_up2_HQ.c
@@ -0,0 +1,113 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+
+/* Upsample by a factor 2, high quality */
+/* Uses 2nd order allpass filters for the 2x upsampling, followed by a */
+/* notch filter just above Nyquist. */
+void silk_resampler_private_up2_HQ(
+ opus_int32 *S, /* I/O Resampler state [ 6 ] */
+ opus_int16 *out, /* O Output signal [ 2 * len ] */
+ const opus_int16 *in, /* I Input signal [ len ] */
+ opus_int32 len /* I Number of input samples */
+)
+{
+ opus_int32 k;
+ opus_int32 in32, out32_1, out32_2, Y, X;
+
+ silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 );
+ silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 );
+ silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 );
+ silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 );
+ silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 );
+ silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 );
+
+ /* Internal variables and state are in Q10 format */
+ for( k = 0; k < len; k++ ) {
+ /* Convert to Q10 */
+ in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 );
+
+ /* First all-pass section for even output sample */
+ Y = silk_SUB32( in32, S[ 0 ] );
+ X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] );
+ out32_1 = silk_ADD32( S[ 0 ], X );
+ S[ 0 ] = silk_ADD32( in32, X );
+
+ /* Second all-pass section for even output sample */
+ Y = silk_SUB32( out32_1, S[ 1 ] );
+ X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] );
+ out32_2 = silk_ADD32( S[ 1 ], X );
+ S[ 1 ] = silk_ADD32( out32_1, X );
+
+ /* Third all-pass section for even output sample */
+ Y = silk_SUB32( out32_2, S[ 2 ] );
+ X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] );
+ out32_1 = silk_ADD32( S[ 2 ], X );
+ S[ 2 ] = silk_ADD32( out32_2, X );
+
+ /* Apply gain in Q15, convert back to int16 and store to output */
+ out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) );
+
+ /* First all-pass section for odd output sample */
+ Y = silk_SUB32( in32, S[ 3 ] );
+ X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] );
+ out32_1 = silk_ADD32( S[ 3 ], X );
+ S[ 3 ] = silk_ADD32( in32, X );
+
+ /* Second all-pass section for odd output sample */
+ Y = silk_SUB32( out32_1, S[ 4 ] );
+ X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] );
+ out32_2 = silk_ADD32( S[ 4 ], X );
+ S[ 4 ] = silk_ADD32( out32_1, X );
+
+ /* Third all-pass section for odd output sample */
+ Y = silk_SUB32( out32_2, S[ 5 ] );
+ X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] );
+ out32_1 = silk_ADD32( S[ 5 ], X );
+ S[ 5 ] = silk_ADD32( out32_2, X );
+
+ /* Apply gain in Q15, convert back to int16 and store to output */
+ out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) );
+ }
+}
+
+void silk_resampler_private_up2_HQ_wrapper(
+ void *SS, /* I/O Resampler state (unused) */
+ opus_int16 *out, /* O Output signal [ 2 * len ] */
+ const opus_int16 *in, /* I Input signal [ len ] */
+ opus_int32 len /* I Number of input samples */
+)
+{
+ silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+ silk_resampler_private_up2_HQ( S->sIIR, out, in, len );
+}
diff --git a/drivers/opus/silk/resampler_rom.c b/drivers/opus/silk/resampler_rom.c
new file mode 100644
index 0000000000..0098e18ba8
--- /dev/null
+++ b/drivers/opus/silk/resampler_rom.c
@@ -0,0 +1,96 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Filter coefficients for IIR/FIR polyphase resampling *
+ * Total size: 179 Words (358 Bytes) */
+
+#include "resampler_private.h"
+
+/* Matlab code for the notch filter coefficients: */
+/* B = [1, 0.147, 1]; A = [1, 0.107, 0.89]; G = 0.93; freqz(G * B, A, 2^14, 16e3); axis([0, 8000, -10, 1]) */
+/* fprintf('\t%6d, %6d, %6d, %6d\n', round(B(2)*2^16), round(-A(2)*2^16), round((1-A(3))*2^16), round(G*2^15)) */
+/* const opus_int16 silk_resampler_up2_hq_notch[ 4 ] = { 9634, -7012, 7209, 30474 }; */
+
+/* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
+ -20694, -13867,
+ -49, 64, 17, -157, 353, -496, 163, 11047, 22205,
+ -39, 6, 91, -170, 186, 23, -896, 6336, 19928,
+ -19, -36, 102, -89, -24, 328, -951, 2568, 15909,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
+ -14457, -14019,
+ 64, 128, -122, 36, 310, -768, 584, 9267, 17733,
+ 12, 128, 18, -142, 288, -117, -865, 4123, 14459,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = {
+ 616, -14323,
+ -10, 39, 58, -46, -84, 120, 184, -315, -541, 1284, 5380, 9024,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+ 16102, -15162,
+ -13, 0, 20, 26, 5, -31, -43, -4, 65, 90, 7, -157, -248, -44, 593, 1583, 2612, 3271,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+ 22500, -15099,
+ 3, -14, -20, -15, 2, 25, 37, 25, -16, -71, -107, -79, 50, 292, 623, 982, 1288, 1464,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+ 27540, -15257,
+ 17, 12, 8, 1, -10, -22, -30, -32, -22, 3, 44, 100, 168, 243, 317, 381, 429, 455,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = {
+ -2797, -6507,
+ 4697, 10739,
+ 1567, 8276,
+};
+
+/* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */
+silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = {
+ { 189, -600, 617, 30567 },
+ { 117, -159, -1070, 29704 },
+ { 52, 221, -2392, 28276 },
+ { -4, 529, -3350, 26341 },
+ { -48, 758, -3956, 23973 },
+ { -80, 905, -4235, 21254 },
+ { -99, 972, -4222, 18278 },
+ { -107, 967, -3957, 15143 },
+ { -103, 896, -3487, 11950 },
+ { -91, 773, -2865, 8798 },
+ { -71, 611, -2143, 5784 },
+ { -46, 425, -1375, 2996 },
+};
diff --git a/drivers/opus/silk/resampler_rom.h b/drivers/opus/silk/resampler_rom.h
new file mode 100644
index 0000000000..490b3388dc
--- /dev/null
+++ b/drivers/opus/silk/resampler_rom.h
@@ -0,0 +1,68 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_FIX_RESAMPLER_ROM_H
+#define SILK_FIX_RESAMPLER_ROM_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "typedef.h"
+#include "resampler_structs.h"
+
+#define RESAMPLER_DOWN_ORDER_FIR0 18
+#define RESAMPLER_DOWN_ORDER_FIR1 24
+#define RESAMPLER_DOWN_ORDER_FIR2 36
+#define RESAMPLER_ORDER_FIR_12 8
+
+/* Tables for 2x downsampler */
+static const opus_int16 silk_resampler_down2_0 = 9872;
+static const opus_int16 silk_resampler_down2_1 = 39809 - 65536;
+
+/* Tables for 2x upsampler, high quality */
+static const opus_int16 silk_resampler_up2_hq_0[ 3 ] = { 1746, 14986, 39083 - 65536 };
+static const opus_int16 silk_resampler_up2_hq_1[ 3 ] = { 6854, 25769, 55542 - 65536 };
+
+/* Tables with IIR and FIR coefficients for fractional downsamplers */
+extern const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ];
+extern const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ];
+extern const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ];
+extern const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ];
+
+/* Table with interplation fractions of 1/24, 3/24, ..., 23/24 */
+extern const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_FIX_RESAMPLER_ROM_H */
diff --git a/drivers/opus/silk/resampler_structs.h b/drivers/opus/silk/resampler_structs.h
new file mode 100644
index 0000000000..9e9457d11c
--- /dev/null
+++ b/drivers/opus/silk/resampler_structs.h
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_RESAMPLER_STRUCTS_H
+#define SILK_RESAMPLER_STRUCTS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SILK_RESAMPLER_MAX_FIR_ORDER 36
+#define SILK_RESAMPLER_MAX_IIR_ORDER 6
+
+typedef struct _silk_resampler_state_struct{
+ opus_int32 sIIR[ SILK_RESAMPLER_MAX_IIR_ORDER ]; /* this must be the first element of this struct */
+ union{
+ opus_int32 i32[ SILK_RESAMPLER_MAX_FIR_ORDER ];
+ opus_int16 i16[ SILK_RESAMPLER_MAX_FIR_ORDER ];
+ } sFIR;
+ opus_int16 delayBuf[ 48 ];
+ opus_int resampler_function;
+ opus_int batchSize;
+ opus_int32 invRatio_Q16;
+ opus_int FIR_Order;
+ opus_int FIR_Fracs;
+ opus_int Fs_in_kHz;
+ opus_int Fs_out_kHz;
+ opus_int inputDelay;
+ const opus_int16 *Coefs;
+} silk_resampler_state_struct;
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* SILK_RESAMPLER_STRUCTS_H */
+
diff --git a/drivers/opus/silk/shell_coder.c b/drivers/opus/silk/shell_coder.c
new file mode 100644
index 0000000000..79e392bd98
--- /dev/null
+++ b/drivers/opus/silk/shell_coder.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* shell coder; pulse-subframe length is hardcoded */
+
+static OPUS_INLINE void combine_pulses(
+ opus_int *out, /* O combined pulses vector [len] */
+ const opus_int *in, /* I input vector [2 * len] */
+ const opus_int len /* I number of OUTPUT samples */
+)
+{
+ opus_int k;
+ for( k = 0; k < len; k++ ) {
+ out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ];
+ }
+}
+
+static OPUS_INLINE void encode_split(
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ const opus_int p_child1, /* I pulse amplitude of first child subframe */
+ const opus_int p, /* I pulse amplitude of current subframe */
+ const opus_uint8 *shell_table /* I table of shell cdfs */
+)
+{
+ if( p > 0 ) {
+ ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 );
+ }
+}
+
+static OPUS_INLINE void decode_split(
+ opus_int *p_child1, /* O pulse amplitude of first child subframe */
+ opus_int *p_child2, /* O pulse amplitude of second child subframe */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ const opus_int p, /* I pulse amplitude of current subframe */
+ const opus_uint8 *shell_table /* I table of shell cdfs */
+)
+{
+ if( p > 0 ) {
+ p_child1[ 0 ] = ec_dec_icdf( psRangeDec, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 );
+ p_child2[ 0 ] = p - p_child1[ 0 ];
+ } else {
+ p_child1[ 0 ] = 0;
+ p_child2[ 0 ] = 0;
+ }
+}
+
+/* Shell encoder, operates on one shell code frame of 16 pulses */
+void silk_shell_encoder(
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */
+)
+{
+ opus_int pulses1[ 8 ], pulses2[ 4 ], pulses3[ 2 ], pulses4[ 1 ];
+
+ /* this function operates on one shell code frame of 16 pulses */
+ silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
+
+ /* tree representation per pulse-subframe */
+ combine_pulses( pulses1, pulses0, 8 );
+ combine_pulses( pulses2, pulses1, 4 );
+ combine_pulses( pulses3, pulses2, 2 );
+ combine_pulses( pulses4, pulses3, 1 );
+
+ encode_split( psRangeEnc, pulses3[ 0 ], pulses4[ 0 ], silk_shell_code_table3 );
+
+ encode_split( psRangeEnc, pulses2[ 0 ], pulses3[ 0 ], silk_shell_code_table2 );
+
+ encode_split( psRangeEnc, pulses1[ 0 ], pulses2[ 0 ], silk_shell_code_table1 );
+ encode_split( psRangeEnc, pulses0[ 0 ], pulses1[ 0 ], silk_shell_code_table0 );
+ encode_split( psRangeEnc, pulses0[ 2 ], pulses1[ 1 ], silk_shell_code_table0 );
+
+ encode_split( psRangeEnc, pulses1[ 2 ], pulses2[ 1 ], silk_shell_code_table1 );
+ encode_split( psRangeEnc, pulses0[ 4 ], pulses1[ 2 ], silk_shell_code_table0 );
+ encode_split( psRangeEnc, pulses0[ 6 ], pulses1[ 3 ], silk_shell_code_table0 );
+
+ encode_split( psRangeEnc, pulses2[ 2 ], pulses3[ 1 ], silk_shell_code_table2 );
+
+ encode_split( psRangeEnc, pulses1[ 4 ], pulses2[ 2 ], silk_shell_code_table1 );
+ encode_split( psRangeEnc, pulses0[ 8 ], pulses1[ 4 ], silk_shell_code_table0 );
+ encode_split( psRangeEnc, pulses0[ 10 ], pulses1[ 5 ], silk_shell_code_table0 );
+
+ encode_split( psRangeEnc, pulses1[ 6 ], pulses2[ 3 ], silk_shell_code_table1 );
+ encode_split( psRangeEnc, pulses0[ 12 ], pulses1[ 6 ], silk_shell_code_table0 );
+ encode_split( psRangeEnc, pulses0[ 14 ], pulses1[ 7 ], silk_shell_code_table0 );
+}
+
+
+/* Shell decoder, operates on one shell code frame of 16 pulses */
+void silk_shell_decoder(
+ opus_int *pulses0, /* O data: nonnegative pulse amplitudes */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ const opus_int pulses4 /* I number of pulses per pulse-subframe */
+)
+{
+ opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
+
+ /* this function operates on one shell code frame of 16 pulses */
+ silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
+
+ decode_split( &pulses3[ 0 ], &pulses3[ 1 ], psRangeDec, pulses4, silk_shell_code_table3 );
+
+ decode_split( &pulses2[ 0 ], &pulses2[ 1 ], psRangeDec, pulses3[ 0 ], silk_shell_code_table2 );
+
+ decode_split( &pulses1[ 0 ], &pulses1[ 1 ], psRangeDec, pulses2[ 0 ], silk_shell_code_table1 );
+ decode_split( &pulses0[ 0 ], &pulses0[ 1 ], psRangeDec, pulses1[ 0 ], silk_shell_code_table0 );
+ decode_split( &pulses0[ 2 ], &pulses0[ 3 ], psRangeDec, pulses1[ 1 ], silk_shell_code_table0 );
+
+ decode_split( &pulses1[ 2 ], &pulses1[ 3 ], psRangeDec, pulses2[ 1 ], silk_shell_code_table1 );
+ decode_split( &pulses0[ 4 ], &pulses0[ 5 ], psRangeDec, pulses1[ 2 ], silk_shell_code_table0 );
+ decode_split( &pulses0[ 6 ], &pulses0[ 7 ], psRangeDec, pulses1[ 3 ], silk_shell_code_table0 );
+
+ decode_split( &pulses2[ 2 ], &pulses2[ 3 ], psRangeDec, pulses3[ 1 ], silk_shell_code_table2 );
+
+ decode_split( &pulses1[ 4 ], &pulses1[ 5 ], psRangeDec, pulses2[ 2 ], silk_shell_code_table1 );
+ decode_split( &pulses0[ 8 ], &pulses0[ 9 ], psRangeDec, pulses1[ 4 ], silk_shell_code_table0 );
+ decode_split( &pulses0[ 10 ], &pulses0[ 11 ], psRangeDec, pulses1[ 5 ], silk_shell_code_table0 );
+
+ decode_split( &pulses1[ 6 ], &pulses1[ 7 ], psRangeDec, pulses2[ 3 ], silk_shell_code_table1 );
+ decode_split( &pulses0[ 12 ], &pulses0[ 13 ], psRangeDec, pulses1[ 6 ], silk_shell_code_table0 );
+ decode_split( &pulses0[ 14 ], &pulses0[ 15 ], psRangeDec, pulses1[ 7 ], silk_shell_code_table0 );
+}
diff --git a/drivers/opus/silk/sigm_Q15.c b/drivers/opus/silk/sigm_Q15.c
new file mode 100644
index 0000000000..2df5b9695c
--- /dev/null
+++ b/drivers/opus/silk/sigm_Q15.c
@@ -0,0 +1,76 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Approximate sigmoid function */
+
+#include "SigProc_FIX.h"
+
+/* fprintf(1, '%d, ', round(1024 * ([1 ./ (1 + exp(-(1:5))), 1] - 1 ./ (1 + exp(-(0:5)))))); */
+static const opus_int32 sigm_LUT_slope_Q10[ 6 ] = {
+ 237, 153, 73, 30, 12, 7
+};
+/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp(-(0:5))))); */
+static const opus_int32 sigm_LUT_pos_Q15[ 6 ] = {
+ 16384, 23955, 28861, 31213, 32178, 32548
+};
+/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp((0:5))))); */
+static const opus_int32 sigm_LUT_neg_Q15[ 6 ] = {
+ 16384, 8812, 3906, 1554, 589, 219
+};
+
+opus_int silk_sigm_Q15(
+ opus_int in_Q5 /* I */
+)
+{
+ opus_int ind;
+
+ if( in_Q5 < 0 ) {
+ /* Negative input */
+ in_Q5 = -in_Q5;
+ if( in_Q5 >= 6 * 32 ) {
+ return 0; /* Clip */
+ } else {
+ /* Linear interpolation of look up table */
+ ind = silk_RSHIFT( in_Q5, 5 );
+ return( sigm_LUT_neg_Q15[ ind ] - silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) );
+ }
+ } else {
+ /* Positive input */
+ if( in_Q5 >= 6 * 32 ) {
+ return 32767; /* clip */
+ } else {
+ /* Linear interpolation of look up table */
+ ind = silk_RSHIFT( in_Q5, 5 );
+ return( sigm_LUT_pos_Q15[ ind ] + silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) );
+ }
+ }
+}
+
diff --git a/drivers/opus/silk/silk_main.h b/drivers/opus/silk/silk_main.h
new file mode 100644
index 0000000000..2bdf89784d
--- /dev/null
+++ b/drivers/opus/silk/silk_main.h
@@ -0,0 +1,438 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_H
+#define SILK_MAIN_H
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "structs.h"
+#include "tables.h"
+#include "PLC.h"
+#include "control.h"
+#include "debug.h"
+#include "entenc.h"
+#include "entdec.h"
+
+/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
+void silk_stereo_LR_to_MS(
+ stereo_enc_state *state, /* I/O State */
+ opus_int16 x1[], /* I/O Left input signal, becomes mid signal */
+ opus_int16 x2[], /* I/O Right input signal, becomes side signal */
+ opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */
+ opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */
+ opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
+ opus_int32 total_rate_bps, /* I Total bitrate */
+ opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
+ opus_int toMono, /* I Last frame before a stereo->mono transition */
+ opus_int fs_kHz, /* I Sample rate (kHz) */
+ opus_int frame_length /* I Number of samples */
+);
+
+/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
+void silk_stereo_MS_to_LR(
+ stereo_dec_state *state, /* I/O State */
+ opus_int16 x1[], /* I/O Left input signal, becomes mid signal */
+ opus_int16 x2[], /* I/O Right input signal, becomes side signal */
+ const opus_int32 pred_Q13[], /* I Predictors */
+ opus_int fs_kHz, /* I Samples rate (kHz) */
+ opus_int frame_length /* I Number of samples */
+);
+
+/* Find least-squares prediction gain for one signal based on another and quantize it */
+opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */
+ opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */
+ const opus_int16 x[], /* I Basis signal */
+ const opus_int16 y[], /* I Target signal */
+ opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */
+ opus_int length, /* I Number of samples */
+ opus_int smooth_coef_Q16 /* I Smoothing coefficient */
+);
+
+/* Quantize mid/side predictors */
+void silk_stereo_quant_pred(
+ opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */
+ opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */
+);
+
+/* Entropy code the mid/side quantization indices */
+void silk_stereo_encode_pred(
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */
+);
+
+/* Entropy code the mid-only flag */
+void silk_stereo_encode_mid_only(
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int8 mid_only_flag
+);
+
+/* Decode mid/side predictors */
+void silk_stereo_decode_pred(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int32 pred_Q13[] /* O Predictors */
+);
+
+/* Decode mid-only flag */
+void silk_stereo_decode_mid_only(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int *decode_only_mid /* O Flag that only mid channel has been coded */
+);
+
+/* Encodes signs of excitation */
+void silk_encode_signs(
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ const opus_int8 pulses[], /* I pulse signal */
+ opus_int length, /* I length of input */
+ const opus_int signalType, /* I Signal type */
+ const opus_int quantOffsetType, /* I Quantization offset type */
+ const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */
+);
+
+/* Decodes signs of excitation */
+void silk_decode_signs(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int pulses[], /* I/O pulse signal */
+ opus_int length, /* I length of input */
+ const opus_int signalType, /* I Signal type */
+ const opus_int quantOffsetType, /* I Quantization offset type */
+ const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */
+);
+
+/* Check encoder control struct */
+opus_int check_control_input(
+ silk_EncControlStruct *encControl /* I Control structure */
+);
+
+/* Control internal sampling rate */
+opus_int silk_control_audio_bandwidth(
+ silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */
+ silk_EncControlStruct *encControl /* I Control structure */
+);
+
+/* Control SNR of redidual quantizer */
+opus_int silk_control_SNR(
+ silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */
+ opus_int32 TargetRate_bps /* I Target max bitrate (bps) */
+);
+
+/***************/
+/* Shell coder */
+/***************/
+
+/* Encode quantization indices of excitation */
+void silk_encode_pulses(
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ const opus_int signalType, /* I Signal type */
+ const opus_int quantOffsetType, /* I quantOffsetType */
+ opus_int8 pulses[], /* I quantization indices */
+ const opus_int frame_length /* I Frame length */
+);
+
+/* Shell encoder, operates on one shell code frame of 16 pulses */
+void silk_shell_encoder(
+ ec_enc *psRangeEnc, /* I/O compressor data structure */
+ const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */
+);
+
+/* Shell decoder, operates on one shell code frame of 16 pulses */
+void silk_shell_decoder(
+ opus_int *pulses0, /* O data: nonnegative pulse amplitudes */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ const opus_int pulses4 /* I number of pulses per pulse-subframe */
+);
+
+/* Gain scalar quantization with hysteresis, uniform on log scale */
+void silk_gains_quant(
+ opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */
+ opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */
+ opus_int8 *prev_ind, /* I/O last index in previous frame */
+ const opus_int conditional, /* I first gain is delta coded if 1 */
+ const opus_int nb_subfr /* I number of subframes */
+);
+
+/* Gains scalar dequantization, uniform on log scale */
+void silk_gains_dequant(
+ opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */
+ const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */
+ opus_int8 *prev_ind, /* I/O last index in previous frame */
+ const opus_int conditional, /* I first gain is delta coded if 1 */
+ const opus_int nb_subfr /* I number of subframes */
+);
+
+/* Compute unique identifier of gain indices vector */
+opus_int32 silk_gains_ID( /* O returns unique identifier of gains */
+ const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */
+ const opus_int nb_subfr /* I number of subframes */
+);
+
+/* Interpolate two vectors */
+void silk_interpolate(
+ opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */
+ const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */
+ const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */
+ const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */
+ const opus_int d /* I number of parameters */
+);
+
+/* LTP tap quantizer */
+void silk_quant_LTP_gains(
+ opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */
+ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */
+ opus_int8 *periodicity_index, /* O Periodicity Index */
+ opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */
+ const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */
+ opus_int mu_Q9, /* I Mu value (R/D tradeoff) */
+ opus_int lowComplexity, /* I Flag for low complexity */
+ const opus_int nb_subfr /* I number of subframes */
+);
+
+/* Entropy constrained matrix-weighted VQ, for a single input data vector */
+void silk_VQ_WMat_EC(
+ opus_int8 *ind, /* O index of best codebook vector */
+ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
+ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */
+ const opus_int16 *in_Q14, /* I input vector to be quantized */
+ const opus_int32 *W_Q18, /* I weighting matrix */
+ const opus_int8 *cb_Q7, /* I codebook */
+ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */
+ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */
+ const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */
+ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
+ opus_int L /* I number of vectors in codebook */
+);
+
+/************************************/
+/* Noise shaping quantization (NSQ) */
+/************************************/
+void silk_NSQ(
+ const silk_encoder_state *psEncC, /* I/O Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ SideInfoIndices *psIndices, /* I/O Quantization Indices */
+ const opus_int32 x_Q3[], /* I Prefiltered input signal */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
+ const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
+ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
+ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
+ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
+ const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
+ const opus_int LTP_scale_Q14 /* I LTP state scaling */
+);
+
+/* Noise shaping using delayed decision */
+void silk_NSQ_del_dec(
+ const silk_encoder_state *psEncC, /* I/O Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ SideInfoIndices *psIndices, /* I/O Quantization Indices */
+ const opus_int32 x_Q3[], /* I Prefiltered input signal */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
+ const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
+ const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
+ const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
+ const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
+ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
+ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
+ const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
+ const opus_int LTP_scale_Q14 /* I LTP state scaling */
+);
+
+/************/
+/* Silk VAD */
+/************/
+/* Initialize the Silk VAD */
+opus_int silk_VAD_Init( /* O Return value, 0 if success */
+ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
+);
+
+/* Get speech activity level in Q8 */
+opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ const opus_int16 pIn[] /* I PCM input */
+);
+
+/* Low-pass filter with variable cutoff frequency based on */
+/* piece-wise linear interpolation between elliptic filters */
+/* Start by setting transition_frame_no = 1; */
+void silk_LP_variable_cutoff(
+ silk_LP_state *psLP, /* I/O LP filter state */
+ opus_int16 *frame, /* I/O Low-pass filtered output signal */
+ const opus_int frame_length /* I Frame length */
+);
+
+/******************/
+/* NLSF Quantizer */
+/******************/
+/* Limit, stabilize, convert and quantize NLSFs */
+void silk_process_NLSFs(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */
+ opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */
+ const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */
+);
+
+opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */
+ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */
+ opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */
+ const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */
+ const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */
+ const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */
+ const opus_int nSurvivors, /* I Max survivors after first stage */
+ const opus_int signalType /* I Signal type: 0/1/2 */
+);
+
+/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
+void silk_NLSF_VQ(
+ opus_int32 err_Q26[], /* O Quantization errors [K] */
+ const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */
+ const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */
+ const opus_int K, /* I Number of codebook vectors */
+ const opus_int LPC_order /* I Number of LPCs */
+);
+
+/* Delayed-decision quantizer for NLSF residuals */
+opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */
+ opus_int8 indices[], /* O Quantization indices [ order ] */
+ const opus_int16 x_Q10[], /* I Input [ order ] */
+ const opus_int16 w_Q5[], /* I Weights [ order ] */
+ const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */
+ const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */
+ const opus_uint8 ec_rates_Q5[], /* I Rates [] */
+ const opus_int quant_step_size_Q16, /* I Quantization step size */
+ const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */
+ const opus_int32 mu_Q20, /* I R/D tradeoff */
+ const opus_int16 order /* I Number of input values */
+);
+
+/* Unpack predictor values and indices for entropy coding tables */
+void silk_NLSF_unpack(
+ opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */
+ opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */
+ const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */
+ const opus_int CB1_index /* I Index of vector in first LSF codebook */
+);
+
+/***********************/
+/* NLSF vector decoder */
+/***********************/
+void silk_NLSF_decode(
+ opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */
+ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */
+ const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */
+);
+
+/****************************************************/
+/* Decoder Functions */
+/****************************************************/
+opus_int silk_init_decoder(
+ silk_decoder_state *psDec /* I/O Decoder state pointer */
+);
+
+/* Set decoder sampling rate */
+opus_int silk_decoder_set_fs(
+ silk_decoder_state *psDec, /* I/O Decoder state pointer */
+ opus_int fs_kHz, /* I Sampling frequency (kHz) */
+ opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */
+);
+
+/****************/
+/* Decode frame */
+/****************/
+opus_int silk_decode_frame(
+ silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int16 pOut[], /* O Pointer to output speech frame */
+ opus_int32 *pN, /* O Pointer to size of output frame */
+ opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/* Decode indices from bitstream */
+void silk_decode_indices(
+ silk_decoder_state *psDec, /* I/O State */
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int FrameIndex, /* I Frame number */
+ opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/* Decode parameters from payload */
+void silk_decode_parameters(
+ silk_decoder_state *psDec, /* I/O State */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+/* Core decoder. Performs inverse NSQ operation LTP + LPC */
+void silk_decode_core(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ opus_int16 xq[], /* O Decoded speech */
+ const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */
+);
+
+/* Decode quantization indices of excitation (Shell coding) */
+void silk_decode_pulses(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int pulses[], /* O Excitation signal */
+ const opus_int signalType, /* I Sigtype */
+ const opus_int quantOffsetType, /* I quantOffsetType */
+ const opus_int frame_length /* I Frame length */
+);
+
+/******************/
+/* CNG */
+/******************/
+
+/* Reset CNG */
+void silk_CNG_Reset(
+ silk_decoder_state *psDec /* I/O Decoder state */
+);
+
+/* Updates CNG estimate, and applies the CNG when packet was lost */
+void silk_CNG(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I/O Decoder control */
+ opus_int16 frame[], /* I/O Signal */
+ opus_int length /* I Length of residual */
+);
+
+/* Encoding of various parameters */
+void silk_encode_indices(
+ silk_encoder_state *psEncC, /* I/O Encoder state */
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int FrameIndex, /* I Frame number */
+ opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */
+ opus_int condCoding /* I The type of conditional coding to use */
+);
+
+#endif
diff --git a/drivers/opus/silk/sort.c b/drivers/opus/silk/sort.c
new file mode 100644
index 0000000000..5e9ba08616
--- /dev/null
+++ b/drivers/opus/silk/sort.c
@@ -0,0 +1,154 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Insertion sort (fast for already almost sorted arrays): */
+/* Best case: O(n) for an already sorted array */
+/* Worst case: O(n^2) for an inversely sorted array */
+/* */
+/* Shell short: http://en.wikipedia.org/wiki/Shell_sort */
+
+#include "SigProc_FIX.h"
+
+void silk_insertion_sort_increasing(
+ opus_int32 *a, /* I/O Unsorted / Sorted vector */
+ opus_int *idx, /* O Index vector for the sorted elements */
+ const opus_int L, /* I Vector length */
+ const opus_int K /* I Number of correctly sorted positions */
+)
+{
+ opus_int32 value;
+ opus_int i, j;
+
+ /* Safety checks */
+ silk_assert( K > 0 );
+ silk_assert( L > 0 );
+ silk_assert( L >= K );
+
+ /* Write start indices in index vector */
+ for( i = 0; i < K; i++ ) {
+ idx[ i ] = i;
+ }
+
+ /* Sort vector elements by value, increasing order */
+ for( i = 1; i < K; i++ ) {
+ value = a[ i ];
+ for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+ a[ j + 1 ] = a[ j ]; /* Shift value */
+ idx[ j + 1 ] = idx[ j ]; /* Shift index */
+ }
+ a[ j + 1 ] = value; /* Write value */
+ idx[ j + 1 ] = i; /* Write index */
+ }
+
+ /* If less than L values are asked for, check the remaining values, */
+ /* but only spend CPU to ensure that the K first values are correct */
+ for( i = K; i < L; i++ ) {
+ value = a[ i ];
+ if( value < a[ K - 1 ] ) {
+ for( j = K - 2; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+ a[ j + 1 ] = a[ j ]; /* Shift value */
+ idx[ j + 1 ] = idx[ j ]; /* Shift index */
+ }
+ a[ j + 1 ] = value; /* Write value */
+ idx[ j + 1 ] = i; /* Write index */
+ }
+ }
+}
+
+#ifdef OPUS_FIXED_POINT
+/* This function is only used by the fixed-point build */
+void silk_insertion_sort_decreasing_int16(
+ opus_int16 *a, /* I/O Unsorted / Sorted vector */
+ opus_int *idx, /* O Index vector for the sorted elements */
+ const opus_int L, /* I Vector length */
+ const opus_int K /* I Number of correctly sorted positions */
+)
+{
+ opus_int i, j;
+ opus_int value;
+
+ /* Safety checks */
+ silk_assert( K > 0 );
+ silk_assert( L > 0 );
+ silk_assert( L >= K );
+
+ /* Write start indices in index vector */
+ for( i = 0; i < K; i++ ) {
+ idx[ i ] = i;
+ }
+
+ /* Sort vector elements by value, decreasing order */
+ for( i = 1; i < K; i++ ) {
+ value = a[ i ];
+ for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+ a[ j + 1 ] = a[ j ]; /* Shift value */
+ idx[ j + 1 ] = idx[ j ]; /* Shift index */
+ }
+ a[ j + 1 ] = value; /* Write value */
+ idx[ j + 1 ] = i; /* Write index */
+ }
+
+ /* If less than L values are asked for, check the remaining values, */
+ /* but only spend CPU to ensure that the K first values are correct */
+ for( i = K; i < L; i++ ) {
+ value = a[ i ];
+ if( value > a[ K - 1 ] ) {
+ for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+ a[ j + 1 ] = a[ j ]; /* Shift value */
+ idx[ j + 1 ] = idx[ j ]; /* Shift index */
+ }
+ a[ j + 1 ] = value; /* Write value */
+ idx[ j + 1 ] = i; /* Write index */
+ }
+ }
+}
+#endif
+
+void silk_insertion_sort_increasing_all_values_int16(
+ opus_int16 *a, /* I/O Unsorted / Sorted vector */
+ const opus_int L /* I Vector length */
+)
+{
+ opus_int value;
+ opus_int i, j;
+
+ /* Safety checks */
+ silk_assert( L > 0 );
+
+ /* Sort vector elements by value, increasing order */
+ for( i = 1; i < L; i++ ) {
+ value = a[ i ];
+ for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+ a[ j + 1 ] = a[ j ]; /* Shift value */
+ }
+ a[ j + 1 ] = value; /* Write value */
+ }
+}
diff --git a/drivers/opus/silk/stereo_LR_to_MS.c b/drivers/opus/silk/stereo_LR_to_MS.c
new file mode 100644
index 0000000000..678f46984b
--- /dev/null
+++ b/drivers/opus/silk/stereo_LR_to_MS.c
@@ -0,0 +1,229 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
+void silk_stereo_LR_to_MS(
+ stereo_enc_state *state, /* I/O State */
+ opus_int16 x1[], /* I/O Left input signal, becomes mid signal */
+ opus_int16 x2[], /* I/O Right input signal, becomes side signal */
+ opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */
+ opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */
+ opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
+ opus_int32 total_rate_bps, /* I Total bitrate */
+ opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
+ opus_int toMono, /* I Last frame before a stereo->mono transition */
+ opus_int fs_kHz, /* I Sample rate (kHz) */
+ opus_int frame_length /* I Number of samples */
+)
+{
+ opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
+ opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13;
+ opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24;
+ VARDECL( opus_int16, side );
+ VARDECL( opus_int16, LP_mid );
+ VARDECL( opus_int16, HP_mid );
+ VARDECL( opus_int16, LP_side );
+ VARDECL( opus_int16, HP_side );
+ opus_int16 *mid = &x1[ -2 ];
+ SAVE_STACK;
+
+ ALLOC( side, frame_length + 2, opus_int16 );
+ /* Convert to basic mid/side signals */
+ for( n = 0; n < frame_length + 2; n++ ) {
+ sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ];
+ diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ];
+ mid[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
+ side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) );
+ }
+
+ /* Buffering */
+ silk_memcpy( mid, state->sMid, 2 * sizeof( opus_int16 ) );
+ silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) );
+ silk_memcpy( state->sMid, &mid[ frame_length ], 2 * sizeof( opus_int16 ) );
+ silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
+
+ /* LP and HP filter mid signal */
+ ALLOC( LP_mid, frame_length, opus_int16 );
+ ALLOC( HP_mid, frame_length, opus_int16 );
+ for( n = 0; n < frame_length; n++ ) {
+ sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
+ LP_mid[ n ] = sum;
+ HP_mid[ n ] = mid[ n + 1 ] - sum;
+ }
+
+ /* LP and HP filter side signal */
+ ALLOC( LP_side, frame_length, opus_int16 );
+ ALLOC( HP_side, frame_length, opus_int16 );
+ for( n = 0; n < frame_length; n++ ) {
+ sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
+ LP_side[ n ] = sum;
+ HP_side[ n ] = side[ n + 1 ] - sum;
+ }
+
+ /* Find energies and predictors */
+ is10msFrame = frame_length == 10 * fs_kHz;
+ smooth_coef_Q16 = is10msFrame ?
+ SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
+ SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 );
+ smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );
+
+ pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );
+ pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );
+ /* Ratio of the norms of residual and mid signals */
+ frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 );
+ frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) );
+
+ /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
+ total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */
+ if( total_rate_bps < 1 ) {
+ total_rate_bps = 1;
+ }
+ min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
+ silk_assert( min_mid_rate_bps < 32767 );
+ /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */
+ frac_3_Q16 = silk_MUL( 3, frac_Q16 );
+ mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 );
+ /* If Mid bitrate below minimum, reduce stereo width */
+ if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
+ mid_side_rates_bps[ 0 ] = min_mid_rate_bps;
+ mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
+ /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */
+ width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps,
+ silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 );
+ width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) );
+ } else {
+ mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
+ width_Q14 = SILK_FIX_CONST( 1, 14 );
+ }
+
+ /* Smoother */
+ state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 );
+
+ /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
+ *mid_only_flag = 0;
+ if( toMono ) {
+ /* Last frame before stereo->mono transition; collapse stereo width */
+ width_Q14 = 0;
+ pred_Q13[ 0 ] = 0;
+ pred_Q13[ 1 ] = 0;
+ silk_stereo_quant_pred( pred_Q13, ix );
+ } else if( state->width_prev_Q14 == 0 &&
+ ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
+ {
+ /* Code as panned-mono; previous frame already had zero width */
+ /* Scale down and quantize predictors */
+ pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+ pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+ silk_stereo_quant_pred( pred_Q13, ix );
+ /* Collapse stereo width */
+ width_Q14 = 0;
+ pred_Q13[ 0 ] = 0;
+ pred_Q13[ 1 ] = 0;
+ mid_side_rates_bps[ 0 ] = total_rate_bps;
+ mid_side_rates_bps[ 1 ] = 0;
+ *mid_only_flag = 1;
+ } else if( state->width_prev_Q14 != 0 &&
+ ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) )
+ {
+ /* Transition to zero-width stereo */
+ /* Scale down and quantize predictors */
+ pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+ pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+ silk_stereo_quant_pred( pred_Q13, ix );
+ /* Collapse stereo width */
+ width_Q14 = 0;
+ pred_Q13[ 0 ] = 0;
+ pred_Q13[ 1 ] = 0;
+ } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) {
+ /* Full-width stereo coding */
+ silk_stereo_quant_pred( pred_Q13, ix );
+ width_Q14 = SILK_FIX_CONST( 1, 14 );
+ } else {
+ /* Reduced-width stereo coding; scale down and quantize predictors */
+ pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+ pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+ silk_stereo_quant_pred( pred_Q13, ix );
+ width_Q14 = state->smth_width_Q14;
+ }
+
+ /* Make sure to keep on encoding until the tapered output has been transmitted */
+ if( *mid_only_flag == 1 ) {
+ state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz;
+ if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) {
+ *mid_only_flag = 0;
+ } else {
+ /* Limit to avoid wrapping around */
+ state->silent_side_len = 10000;
+ }
+ } else {
+ state->silent_side_len = 0;
+ }
+
+ if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) {
+ mid_side_rates_bps[ 1 ] = 1;
+ mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]);
+ }
+
+ /* Interpolate predictors and subtract prediction from side channel */
+ pred0_Q13 = -state->pred_prev_Q13[ 0 ];
+ pred1_Q13 = -state->pred_prev_Q13[ 1 ];
+ w_Q24 = silk_LSHIFT( state->width_prev_Q14, 10 );
+ denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
+ delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
+ delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
+ deltaw_Q24 = silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 );
+ for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
+ pred0_Q13 += delta0_Q13;
+ pred1_Q13 += delta1_Q13;
+ w_Q24 += deltaw_Q24;
+ sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
+ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
+ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
+ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+ }
+
+ pred0_Q13 = -pred_Q13[ 0 ];
+ pred1_Q13 = -pred_Q13[ 1 ];
+ w_Q24 = silk_LSHIFT( width_Q14, 10 );
+ for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
+ sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
+ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
+ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
+ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+ }
+ state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
+ state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
+ state->width_prev_Q14 = (opus_int16)width_Q14;
+ RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/stereo_MS_to_LR.c b/drivers/opus/silk/stereo_MS_to_LR.c
new file mode 100644
index 0000000000..34f43cf795
--- /dev/null
+++ b/drivers/opus/silk/stereo_MS_to_LR.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
+void silk_stereo_MS_to_LR(
+ stereo_dec_state *state, /* I/O State */
+ opus_int16 x1[], /* I/O Left input signal, becomes mid signal */
+ opus_int16 x2[], /* I/O Right input signal, becomes side signal */
+ const opus_int32 pred_Q13[], /* I Predictors */
+ opus_int fs_kHz, /* I Samples rate (kHz) */
+ opus_int frame_length /* I Number of samples */
+)
+{
+ opus_int n, denom_Q16, delta0_Q13, delta1_Q13;
+ opus_int32 sum, diff, pred0_Q13, pred1_Q13;
+
+ /* Buffering */
+ silk_memcpy( x1, state->sMid, 2 * sizeof( opus_int16 ) );
+ silk_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) );
+ silk_memcpy( state->sMid, &x1[ frame_length ], 2 * sizeof( opus_int16 ) );
+ silk_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) );
+
+ /* Interpolate predictors and add prediction to side channel */
+ pred0_Q13 = state->pred_prev_Q13[ 0 ];
+ pred1_Q13 = state->pred_prev_Q13[ 1 ];
+ denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
+ delta0_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
+ delta1_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
+ for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
+ pred0_Q13 += delta0_Q13;
+ pred1_Q13 += delta1_Q13;
+ sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */
+ sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */
+ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
+ x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+ }
+ pred0_Q13 = pred_Q13[ 0 ];
+ pred1_Q13 = pred_Q13[ 1 ];
+ for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
+ sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */
+ sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */
+ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
+ x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+ }
+ state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ];
+ state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ];
+
+ /* Convert to left/right signals */
+ for( n = 0; n < frame_length; n++ ) {
+ sum = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ];
+ diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ];
+ x1[ n + 1 ] = (opus_int16)silk_SAT16( sum );
+ x2[ n + 1 ] = (opus_int16)silk_SAT16( diff );
+ }
+}
diff --git a/drivers/opus/silk/stereo_decode_pred.c b/drivers/opus/silk/stereo_decode_pred.c
new file mode 100644
index 0000000000..56d94e56fe
--- /dev/null
+++ b/drivers/opus/silk/stereo_decode_pred.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Decode mid/side predictors */
+void silk_stereo_decode_pred(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int32 pred_Q13[] /* O Predictors */
+)
+{
+ opus_int n, ix[ 2 ][ 3 ];
+ opus_int32 low_Q13, step_Q13;
+
+ /* Entropy decoding */
+ n = ec_dec_icdf( psRangeDec, silk_stereo_pred_joint_iCDF, 8 );
+ ix[ 0 ][ 2 ] = silk_DIV32_16( n, 5 );
+ ix[ 1 ][ 2 ] = n - 5 * ix[ 0 ][ 2 ];
+ for( n = 0; n < 2; n++ ) {
+ ix[ n ][ 0 ] = ec_dec_icdf( psRangeDec, silk_uniform3_iCDF, 8 );
+ ix[ n ][ 1 ] = ec_dec_icdf( psRangeDec, silk_uniform5_iCDF, 8 );
+ }
+
+ /* Dequantize */
+ for( n = 0; n < 2; n++ ) {
+ ix[ n ][ 0 ] += 3 * ix[ n ][ 2 ];
+ low_Q13 = silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] ];
+ step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] + 1 ] - low_Q13,
+ SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) );
+ pred_Q13[ n ] = silk_SMLABB( low_Q13, step_Q13, 2 * ix[ n ][ 1 ] + 1 );
+ }
+
+ /* Subtract second from first predictor (helps when actually applying these) */
+ pred_Q13[ 0 ] -= pred_Q13[ 1 ];
+}
+
+/* Decode mid-only flag */
+void silk_stereo_decode_mid_only(
+ ec_dec *psRangeDec, /* I/O Compressor data structure */
+ opus_int *decode_only_mid /* O Flag that only mid channel has been coded */
+)
+{
+ /* Decode flag that only mid channel is coded */
+ *decode_only_mid = ec_dec_icdf( psRangeDec, silk_stereo_only_code_mid_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/stereo_encode_pred.c b/drivers/opus/silk/stereo_encode_pred.c
new file mode 100644
index 0000000000..bfe75b08e4
--- /dev/null
+++ b/drivers/opus/silk/stereo_encode_pred.c
@@ -0,0 +1,62 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Entropy code the mid/side quantization indices */
+void silk_stereo_encode_pred(
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */
+)
+{
+ opus_int n;
+
+ /* Entropy coding */
+ n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ];
+ silk_assert( n < 25 );
+ ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 );
+ for( n = 0; n < 2; n++ ) {
+ silk_assert( ix[ n ][ 0 ] < 3 );
+ silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS );
+ ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 );
+ ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 );
+ }
+}
+
+/* Entropy code the mid-only flag */
+void silk_stereo_encode_mid_only(
+ ec_enc *psRangeEnc, /* I/O Compressor data structure */
+ opus_int8 mid_only_flag
+)
+{
+ /* Encode flag that only mid channel is coded */
+ ec_enc_icdf( psRangeEnc, mid_only_flag, silk_stereo_only_code_mid_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/stereo_find_predictor.c b/drivers/opus/silk/stereo_find_predictor.c
new file mode 100644
index 0000000000..266293dff3
--- /dev/null
+++ b/drivers/opus/silk/stereo_find_predictor.c
@@ -0,0 +1,79 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Find least-squares prediction gain for one signal based on another and quantize it */
+opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */
+ opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */
+ const opus_int16 x[], /* I Basis signal */
+ const opus_int16 y[], /* I Target signal */
+ opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */
+ opus_int length, /* I Number of samples */
+ opus_int smooth_coef_Q16 /* I Smoothing coefficient */
+)
+{
+ opus_int scale, scale1, scale2;
+ opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10;
+
+ /* Find predictor */
+ silk_sum_sqr_shift( &nrgx, &scale1, x, length );
+ silk_sum_sqr_shift( &nrgy, &scale2, y, length );
+ scale = silk_max_int( scale1, scale2 );
+ scale = scale + ( scale & 1 ); /* make even */
+ nrgy = silk_RSHIFT32( nrgy, scale - scale2 );
+ nrgx = silk_RSHIFT32( nrgx, scale - scale1 );
+ nrgx = silk_max_int( nrgx, 1 );
+ corr = silk_inner_prod_aligned_scale( x, y, scale, length );
+ pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 );
+ pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 );
+ pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 );
+
+ /* Faster update for signals with large prediction parameters */
+ smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) );
+
+ /* Smoothed mid and residual norms */
+ silk_assert( smooth_coef_Q16 < 32768 );
+ scale = silk_RSHIFT( scale, 1 );
+ mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ],
+ smooth_coef_Q16 );
+ /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */
+ nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 );
+ nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 );
+ mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ],
+ smooth_coef_Q16 );
+
+ /* Ratio of smoothed residual and mid norms */
+ *ratio_Q14 = silk_DIV32_varQ( mid_res_amp_Q0[ 1 ], silk_max( mid_res_amp_Q0[ 0 ], 1 ), 14 );
+ *ratio_Q14 = silk_LIMIT( *ratio_Q14, 0, 32767 );
+
+ return pred_Q13;
+}
diff --git a/drivers/opus/silk/stereo_quant_pred.c b/drivers/opus/silk/stereo_quant_pred.c
new file mode 100644
index 0000000000..834020d715
--- /dev/null
+++ b/drivers/opus/silk/stereo_quant_pred.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Quantize mid/side predictors */
+void silk_stereo_quant_pred(
+ opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */
+ opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */
+)
+{
+ opus_int i, j, n;
+ opus_int32 low_Q13, step_Q13, lvl_Q13, err_min_Q13, err_Q13, quant_pred_Q13 = 0;
+
+ /* Quantize */
+ for( n = 0; n < 2; n++ ) {
+ /* Brute-force search over quantization levels */
+ err_min_Q13 = silk_int32_MAX;
+ for( i = 0; i < STEREO_QUANT_TAB_SIZE - 1; i++ ) {
+ low_Q13 = silk_stereo_pred_quant_Q13[ i ];
+ step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ i + 1 ] - low_Q13,
+ SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) );
+ for( j = 0; j < STEREO_QUANT_SUB_STEPS; j++ ) {
+ lvl_Q13 = silk_SMLABB( low_Q13, step_Q13, 2 * j + 1 );
+ err_Q13 = silk_abs( pred_Q13[ n ] - lvl_Q13 );
+ if( err_Q13 < err_min_Q13 ) {
+ err_min_Q13 = err_Q13;
+ quant_pred_Q13 = lvl_Q13;
+ ix[ n ][ 0 ] = i;
+ ix[ n ][ 1 ] = j;
+ } else {
+ /* Error increasing, so we're past the optimum */
+ goto done;
+ }
+ }
+ }
+ done:
+ ix[ n ][ 2 ] = silk_DIV32_16( ix[ n ][ 0 ], 3 );
+ ix[ n ][ 0 ] -= ix[ n ][ 2 ] * 3;
+ pred_Q13[ n ] = quant_pred_Q13;
+ }
+
+ /* Subtract second from first predictor (helps when actually applying these) */
+ pred_Q13[ 0 ] -= pred_Q13[ 1 ];
+}
diff --git a/drivers/opus/silk/structs.h b/drivers/opus/silk/structs.h
new file mode 100644
index 0000000000..1826b36a80
--- /dev/null
+++ b/drivers/opus/silk/structs.h
@@ -0,0 +1,327 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_H
+#define SILK_STRUCTS_H
+
+#include "typedef.h"
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/************************************/
+/* Noise shaping quantization state */
+/************************************/
+typedef struct {
+ opus_int16 xq[ 2 * MAX_FRAME_LENGTH ]; /* Buffer for quantized output signal */
+ opus_int32 sLTP_shp_Q14[ 2 * MAX_FRAME_LENGTH ];
+ opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+ opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+ opus_int32 sLF_AR_shp_Q14;
+ opus_int lagPrev;
+ opus_int sLTP_buf_idx;
+ opus_int sLTP_shp_buf_idx;
+ opus_int32 rand_seed;
+ opus_int32 prev_gain_Q16;
+ opus_int rewhite_flag;
+} silk_nsq_state;
+
+/********************************/
+/* VAD state */
+/********************************/
+typedef struct {
+ opus_int32 AnaState[ 2 ]; /* Analysis filterbank state: 0-8 kHz */
+ opus_int32 AnaState1[ 2 ]; /* Analysis filterbank state: 0-4 kHz */
+ opus_int32 AnaState2[ 2 ]; /* Analysis filterbank state: 0-2 kHz */
+ opus_int32 XnrgSubfr[ VAD_N_BANDS ]; /* Subframe energies */
+ opus_int32 NrgRatioSmth_Q8[ VAD_N_BANDS ]; /* Smoothed energy level in each band */
+ opus_int16 HPstate; /* State of differentiator in the lowest band */
+ opus_int32 NL[ VAD_N_BANDS ]; /* Noise energy level in each band */
+ opus_int32 inv_NL[ VAD_N_BANDS ]; /* Inverse noise energy level in each band */
+ opus_int32 NoiseLevelBias[ VAD_N_BANDS ]; /* Noise level estimator bias/offset */
+ opus_int32 counter; /* Frame counter used in the initial phase */
+} silk_VAD_state;
+
+/* Variable cut-off low-pass filter state */
+typedef struct {
+ opus_int32 In_LP_State[ 2 ]; /* Low pass filter state */
+ opus_int32 transition_frame_no; /* Counter which is mapped to a cut-off frequency */
+ opus_int mode; /* Operating mode, <0: switch down, >0: switch up; 0: do nothing */
+} silk_LP_state;
+
+/* Structure containing NLSF codebook */
+typedef struct {
+ const opus_int16 nVectors;
+ const opus_int16 order;
+ const opus_int16 quantStepSize_Q16;
+ const opus_int16 invQuantStepSize_Q6;
+ const opus_uint8 *CB1_NLSF_Q8;
+ const opus_uint8 *CB1_iCDF;
+ const opus_uint8 *pred_Q8;
+ const opus_uint8 *ec_sel;
+ const opus_uint8 *ec_iCDF;
+ const opus_uint8 *ec_Rates_Q5;
+ const opus_int16 *deltaMin_Q15;
+} silk_NLSF_CB_struct;
+
+typedef struct {
+ opus_int16 pred_prev_Q13[ 2 ];
+ opus_int16 sMid[ 2 ];
+ opus_int16 sSide[ 2 ];
+ opus_int32 mid_side_amp_Q0[ 4 ];
+ opus_int16 smth_width_Q14;
+ opus_int16 width_prev_Q14;
+ opus_int16 silent_side_len;
+ opus_int8 predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ];
+ opus_int8 mid_only_flags[ MAX_FRAMES_PER_PACKET ];
+} stereo_enc_state;
+
+typedef struct {
+ opus_int16 pred_prev_Q13[ 2 ];
+ opus_int16 sMid[ 2 ];
+ opus_int16 sSide[ 2 ];
+} stereo_dec_state;
+
+typedef struct {
+ opus_int8 GainsIndices[ MAX_NB_SUBFR ];
+ opus_int8 LTPIndex[ MAX_NB_SUBFR ];
+ opus_int8 NLSFIndices[ MAX_LPC_ORDER + 1 ];
+ opus_int16 lagIndex;
+ opus_int8 contourIndex;
+ opus_int8 signalType;
+ opus_int8 quantOffsetType;
+ opus_int8 NLSFInterpCoef_Q2;
+ opus_int8 PERIndex;
+ opus_int8 LTP_scaleIndex;
+ opus_int8 Seed;
+} SideInfoIndices;
+
+/********************************/
+/* Encoder state */
+/********************************/
+typedef struct {
+ opus_int32 In_HP_State[ 2 ]; /* High pass filter state */
+ opus_int32 variable_HP_smth1_Q15; /* State of first smoother */
+ opus_int32 variable_HP_smth2_Q15; /* State of second smoother */
+ silk_LP_state sLP; /* Low pass filter state */
+ silk_VAD_state sVAD; /* Voice activity detector state */
+ silk_nsq_state sNSQ; /* Noise Shape Quantizer State */
+ opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ]; /* Previously quantized NLSF vector */
+ opus_int speech_activity_Q8; /* Speech activity */
+ opus_int allow_bandwidth_switch; /* Flag indicating that switching of internal bandwidth is allowed */
+ opus_int8 LBRRprevLastGainIndex;
+ opus_int8 prevSignalType;
+ opus_int prevLag;
+ opus_int pitch_LPC_win_length;
+ opus_int max_pitch_lag; /* Highest possible pitch lag (samples) */
+ opus_int32 API_fs_Hz; /* API sampling frequency (Hz) */
+ opus_int32 prev_API_fs_Hz; /* Previous API sampling frequency (Hz) */
+ opus_int maxInternal_fs_Hz; /* Maximum internal sampling frequency (Hz) */
+ opus_int minInternal_fs_Hz; /* Minimum internal sampling frequency (Hz) */
+ opus_int desiredInternal_fs_Hz; /* Soft request for internal sampling frequency (Hz) */
+ opus_int fs_kHz; /* Internal sampling frequency (kHz) */
+ opus_int nb_subfr; /* Number of 5 ms subframes in a frame */
+ opus_int frame_length; /* Frame length (samples) */
+ opus_int subfr_length; /* Subframe length (samples) */
+ opus_int ltp_mem_length; /* Length of LTP memory */
+ opus_int la_pitch; /* Look-ahead for pitch analysis (samples) */
+ opus_int la_shape; /* Look-ahead for noise shape analysis (samples) */
+ opus_int shapeWinLength; /* Window length for noise shape analysis (samples) */
+ opus_int32 TargetRate_bps; /* Target bitrate (bps) */
+ opus_int PacketSize_ms; /* Number of milliseconds to put in each packet */
+ opus_int PacketLoss_perc; /* Packet loss rate measured by farend */
+ opus_int32 frameCounter;
+ opus_int Complexity; /* Complexity setting */
+ opus_int nStatesDelayedDecision; /* Number of states in delayed decision quantization */
+ opus_int useInterpolatedNLSFs; /* Flag for using NLSF interpolation */
+ opus_int shapingLPCOrder; /* Filter order for noise shaping filters */
+ opus_int predictLPCOrder; /* Filter order for prediction filters */
+ opus_int pitchEstimationComplexity; /* Complexity level for pitch estimator */
+ opus_int pitchEstimationLPCOrder; /* Whitening filter order for pitch estimator */
+ opus_int32 pitchEstimationThreshold_Q16; /* Threshold for pitch estimator */
+ opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */
+ opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */
+ opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */
+ opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */
+ opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */
+ opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */
+ opus_int warping_Q16; /* Warping parameter for warped noise shaping */
+ opus_int useCBR; /* Flag to enable constant bitrate */
+ opus_int prefillFlag; /* Flag to indicate that only buffers are prefilled, no coding */
+ const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */
+ const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */
+ const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */
+ opus_int input_quality_bands_Q15[ VAD_N_BANDS ];
+ opus_int input_tilt_Q15;
+ opus_int SNR_dB_Q7; /* Quality setting */
+
+ opus_int8 VAD_flags[ MAX_FRAMES_PER_PACKET ];
+ opus_int8 LBRR_flag;
+ opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ];
+
+ SideInfoIndices indices;
+ opus_int8 pulses[ MAX_FRAME_LENGTH ];
+
+ int arch;
+
+ /* Input/output buffering */
+ opus_int16 inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal */
+ opus_int inputBufIx;
+ opus_int nFramesPerPacket;
+ opus_int nFramesEncoded; /* Number of frames analyzed in current packet */
+
+ opus_int nChannelsAPI;
+ opus_int nChannelsInternal;
+ opus_int channelNb;
+
+ /* Parameters For LTP scaling Control */
+ opus_int frames_since_onset;
+
+ /* Specifically for entropy coding */
+ opus_int ec_prevSignalType;
+ opus_int16 ec_prevLagIndex;
+
+ silk_resampler_state_struct resampler_state;
+
+ /* DTX */
+ opus_int useDTX; /* Flag to enable DTX */
+ opus_int inDTX; /* Flag to signal DTX period */
+ opus_int noSpeechCounter; /* Counts concecutive nonactive frames, used by DTX */
+
+ /* Inband Low Bitrate Redundancy (LBRR) data */
+ opus_int useInBandFEC; /* Saves the API setting for query */
+ opus_int LBRR_enabled; /* Depends on useInBandFRC, bitrate and packet loss rate */
+ opus_int LBRR_GainIncreases; /* Gains increment for coding LBRR frames */
+ SideInfoIndices indices_LBRR[ MAX_FRAMES_PER_PACKET ];
+ opus_int8 pulses_LBRR[ MAX_FRAMES_PER_PACKET ][ MAX_FRAME_LENGTH ];
+} silk_encoder_state;
+
+
+/* Struct for Packet Loss Concealment */
+typedef struct {
+ opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */
+ opus_int16 LTPCoef_Q14[ LTP_ORDER ]; /* LTP coeficients to use for voiced concealment */
+ opus_int16 prevLPC_Q12[ MAX_LPC_ORDER ];
+ opus_int last_frame_lost; /* Was previous frame lost */
+ opus_int32 rand_seed; /* Seed for unvoiced signal generation */
+ opus_int16 randScale_Q14; /* Scaling of unvoiced random signal */
+ opus_int32 conc_energy;
+ opus_int conc_energy_shift;
+ opus_int16 prevLTP_scale_Q14;
+ opus_int32 prevGain_Q16[ 2 ];
+ opus_int fs_kHz;
+ opus_int nb_subfr;
+ opus_int subfr_length;
+} silk_PLC_struct;
+
+/* Struct for CNG */
+typedef struct {
+ opus_int32 CNG_exc_buf_Q14[ MAX_FRAME_LENGTH ];
+ opus_int16 CNG_smth_NLSF_Q15[ MAX_LPC_ORDER ];
+ opus_int32 CNG_synth_state[ MAX_LPC_ORDER ];
+ opus_int32 CNG_smth_Gain_Q16;
+ opus_int32 rand_seed;
+ opus_int fs_kHz;
+} silk_CNG_struct;
+
+/********************************/
+/* Decoder state */
+/********************************/
+typedef struct {
+ opus_int32 prev_gain_Q16;
+ opus_int32 exc_Q14[ MAX_FRAME_LENGTH ];
+ opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ];
+ opus_int16 outBuf[ MAX_FRAME_LENGTH + 2 * MAX_SUB_FRAME_LENGTH ]; /* Buffer for output signal */
+ opus_int lagPrev; /* Previous Lag */
+ opus_int8 LastGainIndex; /* Previous gain index */
+ opus_int fs_kHz; /* Sampling frequency in kHz */
+ opus_int32 fs_API_hz; /* API sample frequency (Hz) */
+ opus_int nb_subfr; /* Number of 5 ms subframes in a frame */
+ opus_int frame_length; /* Frame length (samples) */
+ opus_int subfr_length; /* Subframe length (samples) */
+ opus_int ltp_mem_length; /* Length of LTP memory */
+ opus_int LPC_order; /* LPC order */
+ opus_int16 prevNLSF_Q15[ MAX_LPC_ORDER ]; /* Used to interpolate LSFs */
+ opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation */
+ const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */
+ const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */
+
+ /* For buffering payload in case of more frames per packet */
+ opus_int nFramesDecoded;
+ opus_int nFramesPerPacket;
+
+ /* Specifically for entropy coding */
+ opus_int ec_prevSignalType;
+ opus_int16 ec_prevLagIndex;
+
+ opus_int VAD_flags[ MAX_FRAMES_PER_PACKET ];
+ opus_int LBRR_flag;
+ opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ];
+
+ silk_resampler_state_struct resampler_state;
+
+ const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */
+
+ /* Quantization indices */
+ SideInfoIndices indices;
+
+ /* CNG state */
+ silk_CNG_struct sCNG;
+
+ /* Stuff used for PLC */
+ opus_int lossCnt;
+ opus_int prevSignalType;
+
+ silk_PLC_struct sPLC;
+
+} silk_decoder_state;
+
+/************************/
+/* Decoder control */
+/************************/
+typedef struct {
+ /* Prediction and coding parameters */
+ opus_int pitchL[ MAX_NB_SUBFR ];
+ opus_int32 Gains_Q16[ MAX_NB_SUBFR ];
+ /* Holds interpolated and final coefficients, 4-byte aligned */
+ silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+ opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+ opus_int LTP_scale_Q14;
+} silk_decoder_control;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/sum_sqr_shift.c b/drivers/opus/silk/sum_sqr_shift.c
new file mode 100644
index 0000000000..8ec27f8a03
--- /dev/null
+++ b/drivers/opus/silk/sum_sqr_shift.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Compute number of bits to right shift the sum of squares of a vector */
+/* of int16s to make it fit in an int32 */
+void silk_sum_sqr_shift(
+ opus_int32 *energy, /* O Energy of x, after shifting to the right */
+ opus_int *shift, /* O Number of bits right shift applied to energy */
+ const opus_int16 *x, /* I Input vector */
+ opus_int len /* I Length of input vector */
+)
+{
+ opus_int i, shft;
+ opus_int32 nrg_tmp, nrg;
+
+ nrg = 0;
+ shft = 0;
+ len--;
+ for( i = 0; i < len; i += 2 ) {
+ nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] );
+ nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] );
+ if( nrg < 0 ) {
+ /* Scale down */
+ nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+ shft = 2;
+ break;
+ }
+ }
+ for( ; i < len; i += 2 ) {
+ nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
+ nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] );
+ nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft );
+ if( nrg < 0 ) {
+ /* Scale down */
+ nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+ shft += 2;
+ }
+ }
+ if( i == len ) {
+ /* One sample left to process */
+ nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
+ nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
+ }
+
+ /* Make sure to have at least one extra leading zero (two leading zeros in total) */
+ if( nrg & 0xC0000000 ) {
+ nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+ shft += 2;
+ }
+
+ /* Output arguments */
+ *shift = shft;
+ *energy = nrg;
+}
+
diff --git a/drivers/opus/silk/table_LSF_cos.c b/drivers/opus/silk/table_LSF_cos.c
new file mode 100644
index 0000000000..674b6a03e6
--- /dev/null
+++ b/drivers/opus/silk/table_LSF_cos.c
@@ -0,0 +1,70 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+/* Cosine approximation table for LSF conversion */
+/* Q12 values (even) */
+const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ] = {
+ 8192, 8190, 8182, 8170,
+ 8152, 8130, 8104, 8072,
+ 8034, 7994, 7946, 7896,
+ 7840, 7778, 7714, 7644,
+ 7568, 7490, 7406, 7318,
+ 7226, 7128, 7026, 6922,
+ 6812, 6698, 6580, 6458,
+ 6332, 6204, 6070, 5934,
+ 5792, 5648, 5502, 5352,
+ 5198, 5040, 4880, 4718,
+ 4552, 4382, 4212, 4038,
+ 3862, 3684, 3502, 3320,
+ 3136, 2948, 2760, 2570,
+ 2378, 2186, 1990, 1794,
+ 1598, 1400, 1202, 1002,
+ 802, 602, 402, 202,
+ 0, -202, -402, -602,
+ -802, -1002, -1202, -1400,
+ -1598, -1794, -1990, -2186,
+ -2378, -2570, -2760, -2948,
+ -3136, -3320, -3502, -3684,
+ -3862, -4038, -4212, -4382,
+ -4552, -4718, -4880, -5040,
+ -5198, -5352, -5502, -5648,
+ -5792, -5934, -6070, -6204,
+ -6332, -6458, -6580, -6698,
+ -6812, -6922, -7026, -7128,
+ -7226, -7318, -7406, -7490,
+ -7568, -7644, -7714, -7778,
+ -7840, -7896, -7946, -7994,
+ -8034, -8072, -8104, -8130,
+ -8152, -8170, -8182, -8190,
+ -8192
+};
diff --git a/drivers/opus/silk/tables.h b/drivers/opus/silk/tables.h
new file mode 100644
index 0000000000..a91431e854
--- /dev/null
+++ b/drivers/opus/silk/tables.h
@@ -0,0 +1,122 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TABLES_H
+#define SILK_TABLES_H
+
+#include "define.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Entropy coding tables (with size in bytes indicated) */
+extern const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ]; /* 24 */
+extern const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ]; /* 41 */
+
+extern const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ];/* 32 */
+extern const opus_uint8 silk_pitch_delta_iCDF[ 21 ]; /* 21 */
+extern const opus_uint8 silk_pitch_contour_iCDF[ 34 ]; /* 34 */
+extern const opus_uint8 silk_pitch_contour_NB_iCDF[ 11 ]; /* 11 */
+extern const opus_uint8 silk_pitch_contour_10_ms_iCDF[ 12 ]; /* 12 */
+extern const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[ 3 ]; /* 3 */
+
+extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ MAX_PULSES + 2 ]; /* 180 */
+extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ MAX_PULSES + 2 ]; /* 162 */
+
+extern const opus_uint8 silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */
+extern const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */
+
+extern const opus_uint8 silk_max_pulses_table[ 4 ]; /* 4 */
+
+extern const opus_uint8 silk_shell_code_table0[ 152 ]; /* 152 */
+extern const opus_uint8 silk_shell_code_table1[ 152 ]; /* 152 */
+extern const opus_uint8 silk_shell_code_table2[ 152 ]; /* 152 */
+extern const opus_uint8 silk_shell_code_table3[ 152 ]; /* 152 */
+extern const opus_uint8 silk_shell_code_table_offsets[ MAX_PULSES + 1 ]; /* 17 */
+
+extern const opus_uint8 silk_lsb_iCDF[ 2 ]; /* 2 */
+
+extern const opus_uint8 silk_sign_iCDF[ 42 ]; /* 42 */
+
+extern const opus_uint8 silk_uniform3_iCDF[ 3 ]; /* 3 */
+extern const opus_uint8 silk_uniform4_iCDF[ 4 ]; /* 4 */
+extern const opus_uint8 silk_uniform5_iCDF[ 5 ]; /* 5 */
+extern const opus_uint8 silk_uniform6_iCDF[ 6 ]; /* 6 */
+extern const opus_uint8 silk_uniform8_iCDF[ 8 ]; /* 8 */
+
+extern const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ]; /* 7 */
+
+extern const opus_uint8 silk_LTP_per_index_iCDF[ 3 ]; /* 3 */
+extern const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ]; /* 3 */
+extern const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ]; /* 3 */
+extern const opus_int16 silk_LTP_gain_middle_avg_RD_Q14;
+extern const opus_int8 * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ]; /* 168 */
+extern const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS];
+
+extern const opus_int8 silk_LTP_vq_sizes[ NB_LTP_CBKS ]; /* 3 */
+
+extern const opus_uint8 silk_LTPscale_iCDF[ 3 ]; /* 4 */
+extern const opus_int16 silk_LTPScales_table_Q14[ 3 ]; /* 6 */
+
+extern const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ]; /* 4 */
+extern const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ]; /* 2 */
+
+extern const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ]; /* 32 */
+extern const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ]; /* 25 */
+extern const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ]; /* 2 */
+
+extern const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ]; /* 10 */
+
+extern const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ]; /* 5 */
+
+extern const silk_NLSF_CB_struct silk_NLSF_CB_WB; /* 1040 */
+extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB; /* 728 */
+
+/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
+extern const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ]; /* 32 */
+extern const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ]; /* 32 */
+extern const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ]; /* 32 */
+extern const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ]; /* 32 */
+
+/* Quantization offsets */
+extern const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ]; /* 8 */
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+extern const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ]; /* 60 */
+extern const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ]; /* 60 */
+
+/* Rom table with cosine values */
+extern const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ]; /* 258 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/tables_LTP.c b/drivers/opus/silk/tables_LTP.c
new file mode 100644
index 0000000000..56b672db8b
--- /dev/null
+++ b/drivers/opus/silk/tables_LTP.c
@@ -0,0 +1,296 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_LTP_per_index_iCDF[3] = {
+ 179, 99, 0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_0[8] = {
+ 71, 56, 43, 30, 21, 12, 6, 0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_1[16] = {
+ 199, 165, 144, 124, 109, 96, 84, 71,
+ 61, 51, 42, 32, 23, 15, 8, 0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_2[32] = {
+ 241, 225, 211, 199, 187, 175, 164, 153,
+ 142, 132, 123, 114, 105, 96, 88, 80,
+ 72, 64, 57, 50, 44, 38, 33, 29,
+ 24, 20, 16, 12, 9, 5, 2, 0
+};
+
+const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304;
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = {
+ 15, 131, 138, 138, 155, 155, 173, 173
+};
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_1[16] = {
+ 69, 93, 115, 118, 131, 138, 141, 138,
+ 150, 150, 155, 150, 155, 160, 166, 160
+};
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_2[32] = {
+ 131, 128, 134, 141, 141, 141, 145, 145,
+ 145, 150, 155, 155, 155, 155, 160, 160,
+ 160, 160, 166, 166, 173, 173, 182, 192,
+ 182, 192, 192, 192, 205, 192, 205, 224
+};
+
+const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[NB_LTP_CBKS] = {
+ silk_LTP_gain_iCDF_0,
+ silk_LTP_gain_iCDF_1,
+ silk_LTP_gain_iCDF_2
+};
+
+const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[NB_LTP_CBKS] = {
+ silk_LTP_gain_BITS_Q5_0,
+ silk_LTP_gain_BITS_Q5_1,
+ silk_LTP_gain_BITS_Q5_2
+};
+
+static const opus_int8 silk_LTP_gain_vq_0[8][5] =
+{
+{
+ 4, 6, 24, 7, 5
+},
+{
+ 0, 0, 2, 0, 0
+},
+{
+ 12, 28, 41, 13, -4
+},
+{
+ -9, 15, 42, 25, 14
+},
+{
+ 1, -2, 62, 41, -9
+},
+{
+ -10, 37, 65, -4, 3
+},
+{
+ -6, 4, 66, 7, -8
+},
+{
+ 16, 14, 38, -3, 33
+}
+};
+
+static const opus_int8 silk_LTP_gain_vq_1[16][5] =
+{
+{
+ 13, 22, 39, 23, 12
+},
+{
+ -1, 36, 64, 27, -6
+},
+{
+ -7, 10, 55, 43, 17
+},
+{
+ 1, 1, 8, 1, 1
+},
+{
+ 6, -11, 74, 53, -9
+},
+{
+ -12, 55, 76, -12, 8
+},
+{
+ -3, 3, 93, 27, -4
+},
+{
+ 26, 39, 59, 3, -8
+},
+{
+ 2, 0, 77, 11, 9
+},
+{
+ -8, 22, 44, -6, 7
+},
+{
+ 40, 9, 26, 3, 9
+},
+{
+ -7, 20, 101, -7, 4
+},
+{
+ 3, -8, 42, 26, 0
+},
+{
+ -15, 33, 68, 2, 23
+},
+{
+ -2, 55, 46, -2, 15
+},
+{
+ 3, -1, 21, 16, 41
+}
+};
+
+static const opus_int8 silk_LTP_gain_vq_2[32][5] =
+{
+{
+ -6, 27, 61, 39, 5
+},
+{
+ -11, 42, 88, 4, 1
+},
+{
+ -2, 60, 65, 6, -4
+},
+{
+ -1, -5, 73, 56, 1
+},
+{
+ -9, 19, 94, 29, -9
+},
+{
+ 0, 12, 99, 6, 4
+},
+{
+ 8, -19, 102, 46, -13
+},
+{
+ 3, 2, 13, 3, 2
+},
+{
+ 9, -21, 84, 72, -18
+},
+{
+ -11, 46, 104, -22, 8
+},
+{
+ 18, 38, 48, 23, 0
+},
+{
+ -16, 70, 83, -21, 11
+},
+{
+ 5, -11, 117, 22, -8
+},
+{
+ -6, 23, 117, -12, 3
+},
+{
+ 3, -8, 95, 28, 4
+},
+{
+ -10, 15, 77, 60, -15
+},
+{
+ -1, 4, 124, 2, -4
+},
+{
+ 3, 38, 84, 24, -25
+},
+{
+ 2, 13, 42, 13, 31
+},
+{
+ 21, -4, 56, 46, -1
+},
+{
+ -1, 35, 79, -13, 19
+},
+{
+ -7, 65, 88, -9, -14
+},
+{
+ 20, 4, 81, 49, -29
+},
+{
+ 20, 0, 75, 3, -17
+},
+{
+ 5, -9, 44, 92, -8
+},
+{
+ 1, -3, 22, 69, 31
+},
+{
+ -6, 95, 41, -12, 5
+},
+{
+ 39, 67, 16, -4, 1
+},
+{
+ 0, -6, 120, 55, -36
+},
+{
+ -13, 44, 122, 4, -24
+},
+{
+ 81, 5, 11, 3, 7
+},
+{
+ 2, 0, 9, 10, 88
+}
+};
+
+const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = {
+ (opus_int8 *)&silk_LTP_gain_vq_0[0][0],
+ (opus_int8 *)&silk_LTP_gain_vq_1[0][0],
+ (opus_int8 *)&silk_LTP_gain_vq_2[0][0]
+};
+
+/* Maximum frequency-dependent response of the pitch taps above,
+ computed as max(abs(freqz(taps))) */
+static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = {
+ 46, 2, 90, 87, 93, 91, 82, 98
+};
+
+static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = {
+ 109, 120, 118, 12, 113, 115, 117, 119,
+ 99, 59, 87, 111, 63, 111, 112, 80
+};
+
+static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = {
+ 126, 124, 125, 124, 129, 121, 126, 23,
+ 132, 127, 127, 127, 126, 127, 122, 133,
+ 130, 134, 101, 118, 119, 145, 126, 86,
+ 124, 120, 123, 119, 170, 173, 107, 109
+};
+
+const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = {
+ &silk_LTP_gain_vq_0_gain[0],
+ &silk_LTP_gain_vq_1_gain[0],
+ &silk_LTP_gain_vq_2_gain[0]
+};
+
+const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = {
+ 8, 16, 32
+};
diff --git a/drivers/opus/silk/tables_NLSF_CB_NB_MB.c b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c
new file mode 100644
index 0000000000..ded35eee74
--- /dev/null
+++ b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c
@@ -0,0 +1,159 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = {
+ 12, 35, 60, 83, 108, 132, 157, 180,
+ 206, 228, 15, 32, 55, 77, 101, 125,
+ 151, 175, 201, 225, 19, 42, 66, 89,
+ 114, 137, 162, 184, 209, 230, 12, 25,
+ 50, 72, 97, 120, 147, 172, 200, 223,
+ 26, 44, 69, 90, 114, 135, 159, 180,
+ 205, 225, 13, 22, 53, 80, 106, 130,
+ 156, 180, 205, 228, 15, 25, 44, 64,
+ 90, 115, 142, 168, 196, 222, 19, 24,
+ 62, 82, 100, 120, 145, 168, 190, 214,
+ 22, 31, 50, 79, 103, 120, 151, 170,
+ 203, 227, 21, 29, 45, 65, 106, 124,
+ 150, 171, 196, 224, 30, 49, 75, 97,
+ 121, 142, 165, 186, 209, 229, 19, 25,
+ 52, 70, 93, 116, 143, 166, 192, 219,
+ 26, 34, 62, 75, 97, 118, 145, 167,
+ 194, 217, 25, 33, 56, 70, 91, 113,
+ 143, 165, 196, 223, 21, 34, 51, 72,
+ 97, 117, 145, 171, 196, 222, 20, 29,
+ 50, 67, 90, 117, 144, 168, 197, 221,
+ 22, 31, 48, 66, 95, 117, 146, 168,
+ 196, 222, 24, 33, 51, 77, 116, 134,
+ 158, 180, 200, 224, 21, 28, 70, 87,
+ 106, 124, 149, 170, 194, 217, 26, 33,
+ 53, 64, 83, 117, 152, 173, 204, 225,
+ 27, 34, 65, 95, 108, 129, 155, 174,
+ 210, 225, 20, 26, 72, 99, 113, 131,
+ 154, 176, 200, 219, 34, 43, 61, 78,
+ 93, 114, 155, 177, 205, 229, 23, 29,
+ 54, 97, 124, 138, 163, 179, 209, 229,
+ 30, 38, 56, 89, 118, 129, 158, 178,
+ 200, 231, 21, 29, 49, 63, 85, 111,
+ 142, 163, 193, 222, 27, 48, 77, 103,
+ 133, 158, 179, 196, 215, 232, 29, 47,
+ 74, 99, 124, 151, 176, 198, 220, 237,
+ 33, 42, 61, 76, 93, 121, 155, 174,
+ 207, 225, 29, 53, 87, 112, 136, 154,
+ 170, 188, 208, 227, 24, 30, 52, 84,
+ 131, 150, 166, 186, 203, 229, 37, 48,
+ 64, 84, 104, 118, 156, 177, 201, 230
+};
+
+static const opus_uint8 silk_NLSF_CB1_iCDF_NB_MB[ 64 ] = {
+ 212, 178, 148, 129, 108, 96, 85, 82,
+ 79, 77, 61, 59, 57, 56, 51, 49,
+ 48, 45, 42, 41, 40, 38, 36, 34,
+ 31, 30, 21, 12, 10, 3, 1, 0,
+ 255, 245, 244, 236, 233, 225, 217, 203,
+ 190, 176, 175, 161, 149, 136, 125, 114,
+ 102, 91, 81, 71, 60, 52, 43, 35,
+ 28, 20, 19, 18, 12, 11, 5, 0
+};
+
+static const opus_uint8 silk_NLSF_CB2_SELECT_NB_MB[ 160 ] = {
+ 16, 0, 0, 0, 0, 99, 66, 36,
+ 36, 34, 36, 34, 34, 34, 34, 83,
+ 69, 36, 52, 34, 116, 102, 70, 68,
+ 68, 176, 102, 68, 68, 34, 65, 85,
+ 68, 84, 36, 116, 141, 152, 139, 170,
+ 132, 187, 184, 216, 137, 132, 249, 168,
+ 185, 139, 104, 102, 100, 68, 68, 178,
+ 218, 185, 185, 170, 244, 216, 187, 187,
+ 170, 244, 187, 187, 219, 138, 103, 155,
+ 184, 185, 137, 116, 183, 155, 152, 136,
+ 132, 217, 184, 184, 170, 164, 217, 171,
+ 155, 139, 244, 169, 184, 185, 170, 164,
+ 216, 223, 218, 138, 214, 143, 188, 218,
+ 168, 244, 141, 136, 155, 170, 168, 138,
+ 220, 219, 139, 164, 219, 202, 216, 137,
+ 168, 186, 246, 185, 139, 116, 185, 219,
+ 185, 138, 100, 100, 134, 100, 102, 34,
+ 68, 68, 100, 68, 168, 203, 221, 218,
+ 168, 167, 154, 136, 104, 70, 164, 246,
+ 171, 137, 139, 137, 155, 218, 219, 139
+};
+
+static const opus_uint8 silk_NLSF_CB2_iCDF_NB_MB[ 72 ] = {
+ 255, 254, 253, 238, 14, 3, 2, 1,
+ 0, 255, 254, 252, 218, 35, 3, 2,
+ 1, 0, 255, 254, 250, 208, 59, 4,
+ 2, 1, 0, 255, 254, 246, 194, 71,
+ 10, 2, 1, 0, 255, 252, 236, 183,
+ 82, 8, 2, 1, 0, 255, 252, 235,
+ 180, 90, 17, 2, 1, 0, 255, 248,
+ 224, 171, 97, 30, 4, 1, 0, 255,
+ 254, 236, 173, 95, 37, 7, 1, 0
+};
+
+static const opus_uint8 silk_NLSF_CB2_BITS_NB_MB_Q5[ 72 ] = {
+ 255, 255, 255, 131, 6, 145, 255, 255,
+ 255, 255, 255, 236, 93, 15, 96, 255,
+ 255, 255, 255, 255, 194, 83, 25, 71,
+ 221, 255, 255, 255, 255, 162, 73, 34,
+ 66, 162, 255, 255, 255, 210, 126, 73,
+ 43, 57, 173, 255, 255, 255, 201, 125,
+ 71, 48, 58, 130, 255, 255, 255, 166,
+ 110, 73, 57, 62, 104, 210, 255, 255,
+ 251, 123, 65, 55, 68, 100, 171, 255
+};
+
+static const opus_uint8 silk_NLSF_PRED_NB_MB_Q8[ 18 ] = {
+ 179, 138, 140, 148, 151, 149, 153, 151,
+ 163, 116, 67, 82, 59, 92, 72, 100,
+ 89, 92
+};
+
+static const opus_int16 silk_NLSF_DELTA_MIN_NB_MB_Q15[ 11 ] = {
+ 250, 3, 6, 3, 3, 3, 4, 3,
+ 3, 3, 461
+};
+
+const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB =
+{
+ 32,
+ 10,
+ SILK_FIX_CONST( 0.18, 16 ),
+ SILK_FIX_CONST( 1.0 / 0.18, 6 ),
+ silk_NLSF_CB1_NB_MB_Q8,
+ silk_NLSF_CB1_iCDF_NB_MB,
+ silk_NLSF_PRED_NB_MB_Q8,
+ silk_NLSF_CB2_SELECT_NB_MB,
+ silk_NLSF_CB2_iCDF_NB_MB,
+ silk_NLSF_CB2_BITS_NB_MB_Q5,
+ silk_NLSF_DELTA_MIN_NB_MB_Q15,
+};
diff --git a/drivers/opus/silk/tables_NLSF_CB_WB.c b/drivers/opus/silk/tables_NLSF_CB_WB.c
new file mode 100644
index 0000000000..d83567ea6f
--- /dev/null
+++ b/drivers/opus/silk/tables_NLSF_CB_WB.c
@@ -0,0 +1,198 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = {
+ 7, 23, 38, 54, 69, 85, 100, 116,
+ 131, 147, 162, 178, 193, 208, 223, 239,
+ 13, 25, 41, 55, 69, 83, 98, 112,
+ 127, 142, 157, 171, 187, 203, 220, 236,
+ 15, 21, 34, 51, 61, 78, 92, 106,
+ 126, 136, 152, 167, 185, 205, 225, 240,
+ 10, 21, 36, 50, 63, 79, 95, 110,
+ 126, 141, 157, 173, 189, 205, 221, 237,
+ 17, 20, 37, 51, 59, 78, 89, 107,
+ 123, 134, 150, 164, 184, 205, 224, 240,
+ 10, 15, 32, 51, 67, 81, 96, 112,
+ 129, 142, 158, 173, 189, 204, 220, 236,
+ 8, 21, 37, 51, 65, 79, 98, 113,
+ 126, 138, 155, 168, 179, 192, 209, 218,
+ 12, 15, 34, 55, 63, 78, 87, 108,
+ 118, 131, 148, 167, 185, 203, 219, 236,
+ 16, 19, 32, 36, 56, 79, 91, 108,
+ 118, 136, 154, 171, 186, 204, 220, 237,
+ 11, 28, 43, 58, 74, 89, 105, 120,
+ 135, 150, 165, 180, 196, 211, 226, 241,
+ 6, 16, 33, 46, 60, 75, 92, 107,
+ 123, 137, 156, 169, 185, 199, 214, 225,
+ 11, 19, 30, 44, 57, 74, 89, 105,
+ 121, 135, 152, 169, 186, 202, 218, 234,
+ 12, 19, 29, 46, 57, 71, 88, 100,
+ 120, 132, 148, 165, 182, 199, 216, 233,
+ 17, 23, 35, 46, 56, 77, 92, 106,
+ 123, 134, 152, 167, 185, 204, 222, 237,
+ 14, 17, 45, 53, 63, 75, 89, 107,
+ 115, 132, 151, 171, 188, 206, 221, 240,
+ 9, 16, 29, 40, 56, 71, 88, 103,
+ 119, 137, 154, 171, 189, 205, 222, 237,
+ 16, 19, 36, 48, 57, 76, 87, 105,
+ 118, 132, 150, 167, 185, 202, 218, 236,
+ 12, 17, 29, 54, 71, 81, 94, 104,
+ 126, 136, 149, 164, 182, 201, 221, 237,
+ 15, 28, 47, 62, 79, 97, 115, 129,
+ 142, 155, 168, 180, 194, 208, 223, 238,
+ 8, 14, 30, 45, 62, 78, 94, 111,
+ 127, 143, 159, 175, 192, 207, 223, 239,
+ 17, 30, 49, 62, 79, 92, 107, 119,
+ 132, 145, 160, 174, 190, 204, 220, 235,
+ 14, 19, 36, 45, 61, 76, 91, 108,
+ 121, 138, 154, 172, 189, 205, 222, 238,
+ 12, 18, 31, 45, 60, 76, 91, 107,
+ 123, 138, 154, 171, 187, 204, 221, 236,
+ 13, 17, 31, 43, 53, 70, 83, 103,
+ 114, 131, 149, 167, 185, 203, 220, 237,
+ 17, 22, 35, 42, 58, 78, 93, 110,
+ 125, 139, 155, 170, 188, 206, 224, 240,
+ 8, 15, 34, 50, 67, 83, 99, 115,
+ 131, 146, 162, 178, 193, 209, 224, 239,
+ 13, 16, 41, 66, 73, 86, 95, 111,
+ 128, 137, 150, 163, 183, 206, 225, 241,
+ 17, 25, 37, 52, 63, 75, 92, 102,
+ 119, 132, 144, 160, 175, 191, 212, 231,
+ 19, 31, 49, 65, 83, 100, 117, 133,
+ 147, 161, 174, 187, 200, 213, 227, 242,
+ 18, 31, 52, 68, 88, 103, 117, 126,
+ 138, 149, 163, 177, 192, 207, 223, 239,
+ 16, 29, 47, 61, 76, 90, 106, 119,
+ 133, 147, 161, 176, 193, 209, 224, 240,
+ 15, 21, 35, 50, 61, 73, 86, 97,
+ 110, 119, 129, 141, 175, 198, 218, 237
+};
+
+static const opus_uint8 silk_NLSF_CB1_iCDF_WB[ 64 ] = {
+ 225, 204, 201, 184, 183, 175, 158, 154,
+ 153, 135, 119, 115, 113, 110, 109, 99,
+ 98, 95, 79, 68, 52, 50, 48, 45,
+ 43, 32, 31, 27, 18, 10, 3, 0,
+ 255, 251, 235, 230, 212, 201, 196, 182,
+ 167, 166, 163, 151, 138, 124, 110, 104,
+ 90, 78, 76, 70, 69, 57, 45, 34,
+ 24, 21, 11, 6, 5, 4, 3, 0
+};
+
+static const opus_uint8 silk_NLSF_CB2_SELECT_WB[ 256 ] = {
+ 0, 0, 0, 0, 0, 0, 0, 1,
+ 100, 102, 102, 68, 68, 36, 34, 96,
+ 164, 107, 158, 185, 180, 185, 139, 102,
+ 64, 66, 36, 34, 34, 0, 1, 32,
+ 208, 139, 141, 191, 152, 185, 155, 104,
+ 96, 171, 104, 166, 102, 102, 102, 132,
+ 1, 0, 0, 0, 0, 16, 16, 0,
+ 80, 109, 78, 107, 185, 139, 103, 101,
+ 208, 212, 141, 139, 173, 153, 123, 103,
+ 36, 0, 0, 0, 0, 0, 0, 1,
+ 48, 0, 0, 0, 0, 0, 0, 32,
+ 68, 135, 123, 119, 119, 103, 69, 98,
+ 68, 103, 120, 118, 118, 102, 71, 98,
+ 134, 136, 157, 184, 182, 153, 139, 134,
+ 208, 168, 248, 75, 189, 143, 121, 107,
+ 32, 49, 34, 34, 34, 0, 17, 2,
+ 210, 235, 139, 123, 185, 137, 105, 134,
+ 98, 135, 104, 182, 100, 183, 171, 134,
+ 100, 70, 68, 70, 66, 66, 34, 131,
+ 64, 166, 102, 68, 36, 2, 1, 0,
+ 134, 166, 102, 68, 34, 34, 66, 132,
+ 212, 246, 158, 139, 107, 107, 87, 102,
+ 100, 219, 125, 122, 137, 118, 103, 132,
+ 114, 135, 137, 105, 171, 106, 50, 34,
+ 164, 214, 141, 143, 185, 151, 121, 103,
+ 192, 34, 0, 0, 0, 0, 0, 1,
+ 208, 109, 74, 187, 134, 249, 159, 137,
+ 102, 110, 154, 118, 87, 101, 119, 101,
+ 0, 2, 0, 36, 36, 66, 68, 35,
+ 96, 164, 102, 100, 36, 0, 2, 33,
+ 167, 138, 174, 102, 100, 84, 2, 2,
+ 100, 107, 120, 119, 36, 197, 24, 0
+};
+
+static const opus_uint8 silk_NLSF_CB2_iCDF_WB[ 72 ] = {
+ 255, 254, 253, 244, 12, 3, 2, 1,
+ 0, 255, 254, 252, 224, 38, 3, 2,
+ 1, 0, 255, 254, 251, 209, 57, 4,
+ 2, 1, 0, 255, 254, 244, 195, 69,
+ 4, 2, 1, 0, 255, 251, 232, 184,
+ 84, 7, 2, 1, 0, 255, 254, 240,
+ 186, 86, 14, 2, 1, 0, 255, 254,
+ 239, 178, 91, 30, 5, 1, 0, 255,
+ 248, 227, 177, 100, 19, 2, 1, 0
+};
+
+static const opus_uint8 silk_NLSF_CB2_BITS_WB_Q5[ 72 ] = {
+ 255, 255, 255, 156, 4, 154, 255, 255,
+ 255, 255, 255, 227, 102, 15, 92, 255,
+ 255, 255, 255, 255, 213, 83, 24, 72,
+ 236, 255, 255, 255, 255, 150, 76, 33,
+ 63, 214, 255, 255, 255, 190, 121, 77,
+ 43, 55, 185, 255, 255, 255, 245, 137,
+ 71, 43, 59, 139, 255, 255, 255, 255,
+ 131, 66, 50, 66, 107, 194, 255, 255,
+ 166, 116, 76, 55, 53, 125, 255, 255
+};
+
+static const opus_uint8 silk_NLSF_PRED_WB_Q8[ 30 ] = {
+ 175, 148, 160, 176, 178, 173, 174, 164,
+ 177, 174, 196, 182, 198, 192, 182, 68,
+ 62, 66, 60, 72, 117, 85, 90, 118,
+ 136, 151, 142, 160, 142, 155
+};
+
+static const opus_int16 silk_NLSF_DELTA_MIN_WB_Q15[ 17 ] = {
+ 100, 3, 40, 3, 3, 3, 5, 14,
+ 14, 10, 11, 3, 8, 9, 7, 3,
+ 347
+};
+
+const silk_NLSF_CB_struct silk_NLSF_CB_WB =
+{
+ 32,
+ 16,
+ SILK_FIX_CONST( 0.15, 16 ),
+ SILK_FIX_CONST( 1.0 / 0.15, 6 ),
+ silk_NLSF_CB1_WB_Q8,
+ silk_NLSF_CB1_iCDF_WB,
+ silk_NLSF_PRED_WB_Q8,
+ silk_NLSF_CB2_SELECT_WB,
+ silk_NLSF_CB2_iCDF_WB,
+ silk_NLSF_CB2_BITS_WB_Q5,
+ silk_NLSF_DELTA_MIN_WB_Q15,
+};
+
diff --git a/drivers/opus/silk/tables_gain.c b/drivers/opus/silk/tables_gain.c
new file mode 100644
index 0000000000..6df980616b
--- /dev/null
+++ b/drivers/opus/silk/tables_gain.c
@@ -0,0 +1,63 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ] =
+{
+{
+ 224, 112, 44, 15, 3, 2, 1, 0
+},
+{
+ 254, 237, 192, 132, 70, 23, 4, 0
+},
+{
+ 255, 252, 226, 155, 61, 11, 2, 0
+}
+};
+
+const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ] = {
+ 250, 245, 234, 203, 71, 50, 42, 38,
+ 35, 33, 31, 29, 28, 27, 26, 25,
+ 24, 23, 22, 21, 20, 19, 18, 17,
+ 16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1,
+ 0
+};
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/drivers/opus/silk/tables_other.c b/drivers/opus/silk/tables_other.c
new file mode 100644
index 0000000000..246e960fa4
--- /dev/null
+++ b/drivers/opus/silk/tables_other.c
@@ -0,0 +1,138 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "structs.h"
+#include "define.h"
+#include "tables.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
+const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = {
+ 0, 8000, 9400, 11500, 13500, 17500, 25000, MAX_TARGET_RATE_BPS
+};
+const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = {
+ 0, 9000, 12000, 14500, 18500, 24500, 35500, MAX_TARGET_RATE_BPS
+};
+const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = {
+ 0, 10500, 14000, 17000, 21500, 28500, 42000, MAX_TARGET_RATE_BPS
+};
+const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = {
+ 18, 29, 38, 40, 46, 52, 62, 84
+};
+
+/* Tables for stereo predictor coding */
+const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = {
+ -13732, -10050, -8266, -7526, -6500, -5000, -2950, -820,
+ 820, 2950, 5000, 6500, 7526, 8266, 10050, 13732
+};
+const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ] = {
+ 249, 247, 246, 245, 244,
+ 234, 210, 202, 201, 200,
+ 197, 174, 82, 59, 56,
+ 55, 54, 46, 22, 12,
+ 11, 10, 9, 7, 0
+};
+const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ] = { 64, 0 };
+
+/* Tables for LBRR flags */
+static const opus_uint8 silk_LBRR_flags_2_iCDF[ 3 ] = { 203, 150, 0 };
+static const opus_uint8 silk_LBRR_flags_3_iCDF[ 7 ] = { 215, 195, 166, 125, 110, 82, 0 };
+const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ] = {
+ silk_LBRR_flags_2_iCDF,
+ silk_LBRR_flags_3_iCDF
+};
+
+/* Table for LSB coding */
+const opus_uint8 silk_lsb_iCDF[ 2 ] = { 120, 0 };
+
+/* Tables for LTPScale */
+const opus_uint8 silk_LTPscale_iCDF[ 3 ] = { 128, 64, 0 };
+
+/* Tables for signal type and offset coding */
+const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ] = {
+ 232, 158, 10, 0
+};
+const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ] = {
+ 230, 0
+};
+
+/* Tables for NLSF interpolation factor */
+const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ] = { 243, 221, 192, 181, 0 };
+
+/* Quantization offsets */
+const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ] = {
+ { OFFSET_UVL_Q10, OFFSET_UVH_Q10 }, { OFFSET_VL_Q10, OFFSET_VH_Q10 }
+};
+
+/* Table for LTPScale */
+const opus_int16 silk_LTPScales_table_Q14[ 3 ] = { 15565, 12288, 8192 };
+
+/* Uniform entropy tables */
+const opus_uint8 silk_uniform3_iCDF[ 3 ] = { 171, 85, 0 };
+const opus_uint8 silk_uniform4_iCDF[ 4 ] = { 192, 128, 64, 0 };
+const opus_uint8 silk_uniform5_iCDF[ 5 ] = { 205, 154, 102, 51, 0 };
+const opus_uint8 silk_uniform6_iCDF[ 6 ] = { 213, 171, 128, 85, 43, 0 };
+const opus_uint8 silk_uniform8_iCDF[ 8 ] = { 224, 192, 160, 128, 96, 64, 32, 0 };
+
+const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ] = { 100, 40, 16, 7, 3, 1, 0 };
+
+/* Elliptic/Cauer filters designed with 0.1 dB passband ripple,
+ 80 dB minimum stopband attenuation, and
+ [0.95 : 0.15 : 0.35] normalized cut off frequencies. */
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ] =
+{
+{ 250767114, 501534038, 250767114 },
+{ 209867381, 419732057, 209867381 },
+{ 170987846, 341967853, 170987846 },
+{ 131531482, 263046905, 131531482 },
+{ 89306658, 178584282, 89306658 }
+};
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ] =
+{
+{ 506393414, 239854379 },
+{ 411067935, 169683996 },
+{ 306733530, 116694253 },
+{ 185807084, 77959395 },
+{ 35497197, 57401098 }
+};
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/drivers/opus/silk/tables_pitch_lag.c b/drivers/opus/silk/tables_pitch_lag.c
new file mode 100644
index 0000000000..0af5c5ace7
--- /dev/null
+++ b/drivers/opus/silk/tables_pitch_lag.c
@@ -0,0 +1,69 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ] = {
+ 253, 250, 244, 233, 212, 182, 150, 131,
+ 120, 110, 98, 85, 72, 60, 49, 40,
+ 32, 25, 19, 15, 13, 11, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+};
+
+const opus_uint8 silk_pitch_delta_iCDF[21] = {
+ 210, 208, 206, 203, 199, 193, 183, 168,
+ 142, 104, 74, 52, 37, 27, 20, 14,
+ 10, 6, 4, 2, 0
+};
+
+const opus_uint8 silk_pitch_contour_iCDF[34] = {
+ 223, 201, 183, 167, 152, 138, 124, 111,
+ 98, 88, 79, 70, 62, 56, 50, 44,
+ 39, 35, 31, 27, 24, 21, 18, 16,
+ 14, 12, 10, 8, 6, 4, 3, 2,
+ 1, 0
+};
+
+const opus_uint8 silk_pitch_contour_NB_iCDF[11] = {
+ 188, 176, 155, 138, 119, 97, 67, 43,
+ 26, 10, 0
+};
+
+const opus_uint8 silk_pitch_contour_10_ms_iCDF[12] = {
+ 165, 119, 80, 61, 47, 35, 27, 20,
+ 14, 9, 4, 0
+};
+
+const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[3] = {
+ 113, 63, 0
+};
+
+
diff --git a/drivers/opus/silk/tables_pulses_per_block.c b/drivers/opus/silk/tables_pulses_per_block.c
new file mode 100644
index 0000000000..05ba2318f8
--- /dev/null
+++ b/drivers/opus/silk/tables_pulses_per_block.c
@@ -0,0 +1,264 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_max_pulses_table[ 4 ] = {
+ 8, 10, 12, 16
+};
+
+const opus_uint8 silk_pulses_per_block_iCDF[ 10 ][ 18 ] = {
+{
+ 125, 51, 26, 18, 15, 12, 11, 10,
+ 9, 8, 7, 6, 5, 4, 3, 2,
+ 1, 0
+},
+{
+ 198, 105, 45, 22, 15, 12, 11, 10,
+ 9, 8, 7, 6, 5, 4, 3, 2,
+ 1, 0
+},
+{
+ 213, 162, 116, 83, 59, 43, 32, 24,
+ 18, 15, 12, 9, 7, 6, 5, 3,
+ 2, 0
+},
+{
+ 239, 187, 116, 59, 28, 16, 11, 10,
+ 9, 8, 7, 6, 5, 4, 3, 2,
+ 1, 0
+},
+{
+ 250, 229, 188, 135, 86, 51, 30, 19,
+ 13, 10, 8, 6, 5, 4, 3, 2,
+ 1, 0
+},
+{
+ 249, 235, 213, 185, 156, 128, 103, 83,
+ 66, 53, 42, 33, 26, 21, 17, 13,
+ 10, 0
+},
+{
+ 254, 249, 235, 206, 164, 118, 77, 46,
+ 27, 16, 10, 7, 5, 4, 3, 2,
+ 1, 0
+},
+{
+ 255, 253, 249, 239, 220, 191, 156, 119,
+ 85, 57, 37, 23, 15, 10, 6, 4,
+ 2, 0
+},
+{
+ 255, 253, 251, 246, 237, 223, 203, 179,
+ 152, 124, 98, 75, 55, 40, 29, 21,
+ 15, 0
+},
+{
+ 255, 254, 253, 247, 220, 162, 106, 67,
+ 42, 28, 18, 12, 9, 6, 4, 3,
+ 2, 0
+}
+};
+
+const opus_uint8 silk_pulses_per_block_BITS_Q5[ 9 ][ 18 ] = {
+{
+ 31, 57, 107, 160, 205, 205, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255
+},
+{
+ 69, 47, 67, 111, 166, 205, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255
+},
+{
+ 82, 74, 79, 95, 109, 128, 145, 160,
+ 173, 205, 205, 205, 224, 255, 255, 224,
+ 255, 224
+},
+{
+ 125, 74, 59, 69, 97, 141, 182, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255
+},
+{
+ 173, 115, 85, 73, 76, 92, 115, 145,
+ 173, 205, 224, 224, 255, 255, 255, 255,
+ 255, 255
+},
+{
+ 166, 134, 113, 102, 101, 102, 107, 118,
+ 125, 138, 145, 155, 166, 182, 192, 192,
+ 205, 150
+},
+{
+ 224, 182, 134, 101, 83, 79, 85, 97,
+ 120, 145, 173, 205, 224, 255, 255, 255,
+ 255, 255
+},
+{
+ 255, 224, 192, 150, 120, 101, 92, 89,
+ 93, 102, 118, 134, 160, 182, 192, 224,
+ 224, 224
+},
+{
+ 255, 224, 224, 182, 155, 134, 118, 109,
+ 104, 102, 106, 111, 118, 131, 145, 160,
+ 173, 131
+}
+};
+
+const opus_uint8 silk_rate_levels_iCDF[ 2 ][ 9 ] =
+{
+{
+ 241, 190, 178, 132, 87, 74, 41, 14,
+ 0
+},
+{
+ 223, 193, 157, 140, 106, 57, 39, 18,
+ 0
+}
+};
+
+const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ 9 ] =
+{
+{
+ 131, 74, 141, 79, 80, 138, 95, 104,
+ 134
+},
+{
+ 95, 99, 91, 125, 93, 76, 123, 115,
+ 123
+}
+};
+
+const opus_uint8 silk_shell_code_table0[ 152 ] = {
+ 128, 0, 214, 42, 0, 235, 128, 21,
+ 0, 244, 184, 72, 11, 0, 248, 214,
+ 128, 42, 7, 0, 248, 225, 170, 80,
+ 25, 5, 0, 251, 236, 198, 126, 54,
+ 18, 3, 0, 250, 238, 211, 159, 82,
+ 35, 15, 5, 0, 250, 231, 203, 168,
+ 128, 88, 53, 25, 6, 0, 252, 238,
+ 216, 185, 148, 108, 71, 40, 18, 4,
+ 0, 253, 243, 225, 199, 166, 128, 90,
+ 57, 31, 13, 3, 0, 254, 246, 233,
+ 212, 183, 147, 109, 73, 44, 23, 10,
+ 2, 0, 255, 250, 240, 223, 198, 166,
+ 128, 90, 58, 33, 16, 6, 1, 0,
+ 255, 251, 244, 231, 210, 181, 146, 110,
+ 75, 46, 25, 12, 5, 1, 0, 255,
+ 253, 248, 238, 221, 196, 164, 128, 92,
+ 60, 35, 18, 8, 3, 1, 0, 255,
+ 253, 249, 242, 229, 208, 180, 146, 110,
+ 76, 48, 27, 14, 7, 3, 1, 0
+};
+
+const opus_uint8 silk_shell_code_table1[ 152 ] = {
+ 129, 0, 207, 50, 0, 236, 129, 20,
+ 0, 245, 185, 72, 10, 0, 249, 213,
+ 129, 42, 6, 0, 250, 226, 169, 87,
+ 27, 4, 0, 251, 233, 194, 130, 62,
+ 20, 4, 0, 250, 236, 207, 160, 99,
+ 47, 17, 3, 0, 255, 240, 217, 182,
+ 131, 81, 41, 11, 1, 0, 255, 254,
+ 233, 201, 159, 107, 61, 20, 2, 1,
+ 0, 255, 249, 233, 206, 170, 128, 86,
+ 50, 23, 7, 1, 0, 255, 250, 238,
+ 217, 186, 148, 108, 70, 39, 18, 6,
+ 1, 0, 255, 252, 243, 226, 200, 166,
+ 128, 90, 56, 30, 13, 4, 1, 0,
+ 255, 252, 245, 231, 209, 180, 146, 110,
+ 76, 47, 25, 11, 4, 1, 0, 255,
+ 253, 248, 237, 219, 194, 163, 128, 93,
+ 62, 37, 19, 8, 3, 1, 0, 255,
+ 254, 250, 241, 226, 205, 177, 145, 111,
+ 79, 51, 30, 15, 6, 2, 1, 0
+};
+
+const opus_uint8 silk_shell_code_table2[ 152 ] = {
+ 129, 0, 203, 54, 0, 234, 129, 23,
+ 0, 245, 184, 73, 10, 0, 250, 215,
+ 129, 41, 5, 0, 252, 232, 173, 86,
+ 24, 3, 0, 253, 240, 200, 129, 56,
+ 15, 2, 0, 253, 244, 217, 164, 94,
+ 38, 10, 1, 0, 253, 245, 226, 189,
+ 132, 71, 27, 7, 1, 0, 253, 246,
+ 231, 203, 159, 105, 56, 23, 6, 1,
+ 0, 255, 248, 235, 213, 179, 133, 85,
+ 47, 19, 5, 1, 0, 255, 254, 243,
+ 221, 194, 159, 117, 70, 37, 12, 2,
+ 1, 0, 255, 254, 248, 234, 208, 171,
+ 128, 85, 48, 22, 8, 2, 1, 0,
+ 255, 254, 250, 240, 220, 189, 149, 107,
+ 67, 36, 16, 6, 2, 1, 0, 255,
+ 254, 251, 243, 227, 201, 166, 128, 90,
+ 55, 29, 13, 5, 2, 1, 0, 255,
+ 254, 252, 246, 234, 213, 183, 147, 109,
+ 73, 43, 22, 10, 4, 2, 1, 0
+};
+
+const opus_uint8 silk_shell_code_table3[ 152 ] = {
+ 130, 0, 200, 58, 0, 231, 130, 26,
+ 0, 244, 184, 76, 12, 0, 249, 214,
+ 130, 43, 6, 0, 252, 232, 173, 87,
+ 24, 3, 0, 253, 241, 203, 131, 56,
+ 14, 2, 0, 254, 246, 221, 167, 94,
+ 35, 8, 1, 0, 254, 249, 232, 193,
+ 130, 65, 23, 5, 1, 0, 255, 251,
+ 239, 211, 162, 99, 45, 15, 4, 1,
+ 0, 255, 251, 243, 223, 186, 131, 74,
+ 33, 11, 3, 1, 0, 255, 252, 245,
+ 230, 202, 158, 105, 57, 24, 8, 2,
+ 1, 0, 255, 253, 247, 235, 214, 179,
+ 132, 84, 44, 19, 7, 2, 1, 0,
+ 255, 254, 250, 240, 223, 196, 159, 112,
+ 69, 36, 15, 6, 2, 1, 0, 255,
+ 254, 253, 245, 231, 209, 176, 136, 93,
+ 55, 27, 11, 3, 2, 1, 0, 255,
+ 254, 253, 252, 239, 221, 194, 158, 117,
+ 76, 42, 18, 4, 3, 2, 1, 0
+};
+
+const opus_uint8 silk_shell_code_table_offsets[ 17 ] = {
+ 0, 0, 2, 5, 9, 14, 20, 27,
+ 35, 44, 54, 65, 77, 90, 104, 119,
+ 135
+};
+
+const opus_uint8 silk_sign_iCDF[ 42 ] = {
+ 254, 49, 67, 77, 82, 93, 99,
+ 198, 11, 18, 24, 31, 36, 45,
+ 255, 46, 66, 78, 87, 94, 104,
+ 208, 14, 21, 32, 42, 51, 66,
+ 255, 94, 104, 109, 112, 115, 118,
+ 248, 53, 69, 80, 88, 95, 102
+};
diff --git a/drivers/opus/silk/tuning_parameters.h b/drivers/opus/silk/tuning_parameters.h
new file mode 100644
index 0000000000..e1057bbaae
--- /dev/null
+++ b/drivers/opus/silk/tuning_parameters.h
@@ -0,0 +1,171 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TUNING_PARAMETERS_H
+#define SILK_TUNING_PARAMETERS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Decay time for bitreservoir */
+#define BITRESERVOIR_DECAY_TIME_MS 500
+
+/*******************/
+/* Pitch estimator */
+/*******************/
+
+/* Level of noise floor for whitening filter LPC analysis in pitch analysis */
+#define FIND_PITCH_WHITE_NOISE_FRACTION 1e-3f
+
+/* Bandwidth expansion for whitening filter in pitch analysis */
+#define FIND_PITCH_BANDWIDTH_EXPANSION 0.99f
+
+/*********************/
+/* Linear prediction */
+/*********************/
+
+/* LPC analysis regularization */
+#define FIND_LPC_COND_FAC 1e-5f
+
+/* LTP analysis defines */
+#define FIND_LTP_COND_FAC 1e-5f
+#define LTP_DAMPING 0.05f
+#define LTP_SMOOTHING 0.1f
+
+/* LTP quantization settings */
+#define MU_LTP_QUANT_NB 0.03f
+#define MU_LTP_QUANT_MB 0.025f
+#define MU_LTP_QUANT_WB 0.02f
+
+/* Max cumulative LTP gain */
+#define MAX_SUM_LOG_GAIN_DB 250.0f
+
+/***********************/
+/* High pass filtering */
+/***********************/
+
+/* Smoothing parameters for low end of pitch frequency range estimation */
+#define VARIABLE_HP_SMTH_COEF1 0.1f
+#define VARIABLE_HP_SMTH_COEF2 0.015f
+#define VARIABLE_HP_MAX_DELTA_FREQ 0.4f
+
+/* Min and max cut-off frequency values (-3 dB points) */
+#define VARIABLE_HP_MIN_CUTOFF_HZ 60
+#define VARIABLE_HP_MAX_CUTOFF_HZ 100
+
+/***********/
+/* Various */
+/***********/
+
+/* VAD threshold */
+#define SPEECH_ACTIVITY_DTX_THRES 0.05f
+
+/* Speech Activity LBRR enable threshold */
+#define LBRR_SPEECH_ACTIVITY_THRES 0.3f
+
+/*************************/
+/* Perceptual parameters */
+/*************************/
+
+/* reduction in coding SNR during low speech activity */
+#define BG_SNR_DECR_dB 2.0f
+
+/* factor for reducing quantization noise during voiced speech */
+#define HARM_SNR_INCR_dB 2.0f
+
+/* factor for reducing quantization noise for unvoiced sparse signals */
+#define SPARSE_SNR_INCR_dB 2.0f
+
+/* threshold for sparseness measure above which to use lower quantization offset during unvoiced */
+#define SPARSENESS_THRESHOLD_QNT_OFFSET 0.75f
+
+/* warping control */
+#define WARPING_MULTIPLIER 0.015f
+
+/* fraction added to first autocorrelation value */
+#define SHAPE_WHITE_NOISE_FRACTION 5e-5f
+
+/* noise shaping filter chirp factor */
+#define BANDWIDTH_EXPANSION 0.95f
+
+/* difference between chirp factors for analysis and synthesis noise shaping filters at low bitrates */
+#define LOW_RATE_BANDWIDTH_EXPANSION_DELTA 0.01f
+
+/* extra harmonic boosting (signal shaping) at low bitrates */
+#define LOW_RATE_HARMONIC_BOOST 0.1f
+
+/* extra harmonic boosting (signal shaping) for noisy input signals */
+#define LOW_INPUT_QUALITY_HARMONIC_BOOST 0.1f
+
+/* harmonic noise shaping */
+#define HARMONIC_SHAPING 0.3f
+
+/* extra harmonic noise shaping for high bitrates or noisy input */
+#define HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING 0.2f
+
+/* parameter for shaping noise towards higher frequencies */
+#define HP_NOISE_COEF 0.25f
+
+/* parameter for shaping noise even more towards higher frequencies during voiced speech */
+#define HARM_HP_NOISE_COEF 0.35f
+
+/* parameter for applying a high-pass tilt to the input signal */
+#define INPUT_TILT 0.05f
+
+/* parameter for extra high-pass tilt to the input signal at high rates */
+#define HIGH_RATE_INPUT_TILT 0.1f
+
+/* parameter for reducing noise at the very low frequencies */
+#define LOW_FREQ_SHAPING 4.0f
+
+/* less reduction of noise at the very low frequencies for signals with low SNR at low frequencies */
+#define LOW_QUALITY_LOW_FREQ_SHAPING_DECR 0.5f
+
+/* subframe smoothing coefficient for HarmBoost, HarmShapeGain, Tilt (lower -> more smoothing) */
+#define SUBFR_SMTH_COEF 0.4f
+
+/* parameters defining the R/D tradeoff in the residual quantizer */
+#define LAMBDA_OFFSET 1.2f
+#define LAMBDA_SPEECH_ACT -0.2f
+#define LAMBDA_DELAYED_DECISIONS -0.05f
+#define LAMBDA_INPUT_QUALITY -0.1f
+#define LAMBDA_CODING_QUALITY -0.2f
+#define LAMBDA_QUANT_OFFSET 0.8f
+
+/* Compensation in bitrate calculations for 10 ms modes */
+#define REDUCE_BITRATE_10_MS_BPS 2200
+
+/* Maximum time before allowing a bandwidth transition */
+#define MAX_BANDWIDTH_SWITCH_DELAY_MS 5000
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_TUNING_PARAMETERS_H */
diff --git a/drivers/opus/silk/typedef.h b/drivers/opus/silk/typedef.h
new file mode 100644
index 0000000000..ca2361bc82
--- /dev/null
+++ b/drivers/opus/silk/typedef.h
@@ -0,0 +1,78 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TYPEDEF_H
+#define SILK_TYPEDEF_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#ifndef OPUS_FIXED_POINT
+# include <float.h>
+# define silk_float float
+# define silk_float_MAX FLT_MAX
+#endif
+
+#define silk_int64_MAX ((opus_int64)0x7FFFFFFFFFFFFFFFLL) /* 2^63 - 1 */
+#define silk_int64_MIN ((opus_int64)0x8000000000000000LL) /* -2^63 */
+#define silk_int32_MAX 0x7FFFFFFF /* 2^31 - 1 = 2147483647 */
+#define silk_int32_MIN ((opus_int32)0x80000000) /* -2^31 = -2147483648 */
+#define silk_int16_MAX 0x7FFF /* 2^15 - 1 = 32767 */
+#define silk_int16_MIN ((opus_int16)0x8000) /* -2^15 = -32768 */
+#define silk_int8_MAX 0x7F /* 2^7 - 1 = 127 */
+#define silk_int8_MIN ((opus_int8)0x80) /* -2^7 = -128 */
+#define silk_uint8_MAX 0xFF /* 2^8 - 1 = 255 */
+
+#define silk_TRUE 1
+#define silk_FALSE 0
+
+/* assertions */
+#if (defined _WIN32 && !defined _WINCE && !defined(__GNUC__) && !defined(NO_ASSERTS))
+# ifndef silk_assert
+# include <crtdbg.h> /* ASSERTE() */
+# define silk_assert(COND) _ASSERTE(COND)
+# endif
+#else
+# ifdef ENABLE_ASSERTIONS
+# include <stdio.h>
+# include <stdlib.h>
+#define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__);
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line)
+{
+ fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
+ abort();
+}
+# define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}}
+# else
+# define silk_assert(COND)
+# endif
+#endif
+
+#endif /* SILK_TYPEDEF_H */
diff --git a/drivers/opus/stream.c b/drivers/opus/stream.c
new file mode 100644
index 0000000000..17293f2bca
--- /dev/null
+++ b/drivers/opus/stream.c
@@ -0,0 +1,366 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
+
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#if defined(_WIN32)
+# include <io.h>
+#endif
+
+typedef struct OpusMemStream OpusMemStream;
+
+#define OP_MEM_SIZE_MAX (~(size_t)0>>1)
+#define OP_MEM_DIFF_MAX ((ptrdiff_t)OP_MEM_SIZE_MAX)
+
+/*The context information needed to read from a block of memory as if it were a
+ file.*/
+struct OpusMemStream{
+ /*The block of memory to read from.*/
+ const unsigned char *data;
+ /*The total size of the block.
+ This must be at most OP_MEM_SIZE_MAX to prevent signed overflow while
+ seeking.*/
+ ptrdiff_t size;
+ /*The current file position.
+ This is allowed to be set arbitrarily greater than size (i.e., past the end
+ of the block, though we will not read data past the end of the block), but
+ is not allowed to be negative (i.e., before the beginning of the block).*/
+ ptrdiff_t pos;
+};
+
+static int op_fread(void *_stream,unsigned char *_ptr,int _buf_size){
+ FILE *stream;
+ size_t ret;
+ /*Check for empty read.*/
+ if(_buf_size<=0)return 0;
+ stream=(FILE *)_stream;
+ ret=fread(_ptr,1,_buf_size,stream);
+ OP_ASSERT(ret<=(size_t)_buf_size);
+ /*If ret==0 and !feof(stream), there was a read error.*/
+ return ret>0||feof(stream)?(int)ret:OP_EREAD;
+}
+
+static int op_fseek(void *_stream,opus_int64 _offset,int _whence){
+#if defined(_WIN32)
+ /*_fseeki64() is not exposed until MSCVCRT80.
+ This is the default starting with MSVC 2005 (_MSC_VER>=1400), but we want
+ to allow linking against older MSVCRT versions for compatibility back to
+ XP without installing extra runtime libraries.
+ i686-pc-mingw32 does not have fseeko() and requires
+ __MSVCRT_VERSION__>=0x800 for _fseeki64(), which screws up linking with
+ other libraries (that don't use MSVCRT80 from MSVC 2005 by default).
+ i686-w64-mingw32 does have fseeko() and respects _FILE_OFFSET_BITS, but I
+ don't know how to detect that at compile time.
+ We could just use fseeko64() (which is available in both), but its
+ implemented using fgetpos()/fsetpos() just like this code, except without
+ the overflow checking, so we prefer our version.*/
+ opus_int64 pos;
+ /*We don't use fpos_t directly because it might be a struct if __STDC__ is
+ non-zero or _INTEGRAL_MAX_BITS < 64.
+ I'm not certain when the latter is true, but someone could in theory set
+ the former.
+ Either way, it should be binary compatible with a normal 64-bit int (this
+ assumption is not portable, but I believe it is true for MSVCRT).*/
+ OP_ASSERT(sizeof(pos)==sizeof(fpos_t));
+ /*Translate the seek to an absolute one.*/
+ if(_whence==SEEK_CUR){
+ int ret;
+ ret=fgetpos((FILE *)_stream,(fpos_t *)&pos);
+ if(ret)return ret;
+ }
+ else if(_whence==SEEK_END)pos=_filelengthi64(_fileno((FILE *)_stream));
+ else if(_whence==SEEK_SET)pos=0;
+ else return -1;
+ /*Check for errors or overflow.*/
+ if(pos<0||_offset<-pos||_offset>OP_INT64_MAX-pos)return -1;
+ pos+=_offset;
+ return fsetpos((FILE *)_stream,(fpos_t *)&pos);
+#else
+ /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer
+ it except on Windows.*/
+ return fseeko((FILE *)_stream,(off_t)_offset,_whence);
+#endif
+}
+
+static opus_int64 op_ftell(void *_stream){
+#if defined(_WIN32)
+ /*_ftelli64() is not exposed until MSCVCRT80, and ftello()/ftello64() have
+ the same problems as fseeko()/fseeko64() in MingW.
+ See above for a more detailed explanation.*/
+ opus_int64 pos;
+ OP_ASSERT(sizeof(pos)==sizeof(fpos_t));
+ return fgetpos((FILE *)_stream,(fpos_t *)&pos)?-1:pos;
+#else
+ /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer
+ it except on Windows.*/
+ return ftello((FILE *)_stream);
+#endif
+}
+
+static const OpusFileCallbacks OP_FILE_CALLBACKS={
+ op_fread,
+ op_fseek,
+ op_ftell,
+ (op_close_func)fclose
+};
+
+#if defined(_WIN32)
+# include <stddef.h>
+# include <errno.h>
+
+/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API,
+ so if we just pass the path to fopen(), then there'd be no way for a user
+ of our API to open a Unicode filename.
+ Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API.
+ This makes this API more consistent with platforms where the character set
+ used by fopen is the same as used on disk, which is generally UTF-8, and
+ with our metadata API, which always uses UTF-8.*/
+static wchar_t *op_utf8_to_utf16(const char *_src){
+ wchar_t *dst;
+ size_t len;
+ len=strlen(_src);
+ /*Worst-case output is 1 wide character per 1 input character.*/
+ dst=(wchar_t *)_ogg_malloc(sizeof(*dst)*(len+1));
+ if(dst!=NULL){
+ size_t si;
+ size_t di;
+ for(di=si=0;si<len;si++){
+ int c0;
+ c0=(unsigned char)_src[si];
+ if(!(c0&0x80)){
+ /*Start byte says this is a 1-byte sequence.*/
+ dst[di++]=(wchar_t)c0;
+ continue;
+ }
+ else{
+ int c1;
+ /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
+ c1=(unsigned char)_src[si+1];
+ if((c1&0xC0)==0x80){
+ /*Found at least one continuation byte.*/
+ if((c0&0xE0)==0xC0){
+ wchar_t w;
+ /*Start byte says this is a 2-byte sequence.*/
+ w=(c0&0x1F)<<6|c1&0x3F;
+ if(w>=0x80U){
+ /*This is a 2-byte sequence that is not overlong.*/
+ dst[di++]=w;
+ si++;
+ continue;
+ }
+ }
+ else{
+ int c2;
+ /*This is safe, because c1 was not 0 and _src is NUL-terminated.*/
+ c2=(unsigned char)_src[si+2];
+ if((c2&0xC0)==0x80){
+ /*Found at least two continuation bytes.*/
+ if((c0&0xF0)==0xE0){
+ wchar_t w;
+ /*Start byte says this is a 3-byte sequence.*/
+ w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F;
+ if(w>=0x800U&&(w<0xD800||w>=0xE000)&&w<0xFFFE){
+ /*This is a 3-byte sequence that is not overlong, not a
+ UTF-16 surrogate pair value, and not a 'not a character'
+ value.*/
+ dst[di++]=w;
+ si+=2;
+ continue;
+ }
+ }
+ else{
+ int c3;
+ /*This is safe, because c2 was not 0 and _src is
+ NUL-terminated.*/
+ c3=(unsigned char)_src[si+3];
+ if((c3&0xC0)==0x80){
+ /*Found at least three continuation bytes.*/
+ if((c0&0xF8)==0xF0){
+ opus_uint32 w;
+ /*Start byte says this is a 4-byte sequence.*/
+ w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F);
+ if(w>=0x10000U&&w<0x110000U){
+ /*This is a 4-byte sequence that is not overlong and not
+ greater than the largest valid Unicode code point.
+ Convert it to a surrogate pair.*/
+ w-=0x10000;
+ dst[di++]=(wchar_t)(0xD800+(w>>10));
+ dst[di++]=(wchar_t)(0xDC00+(w&0x3FF));
+ si+=3;
+ continue;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ /*If we got here, we encountered an illegal UTF-8 sequence.*/
+ _ogg_free(dst);
+ return NULL;
+ }
+ OP_ASSERT(di<=len);
+ dst[di]='\0';
+ }
+ return dst;
+}
+
+#endif
+
+void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){
+ FILE *fp;
+#if !defined(_WIN32)
+ fp=fopen(_path,_mode);
+#else
+ fp=NULL;
+ if(_path==NULL||_mode==NULL)errno=EINVAL;
+ else{
+ wchar_t *wpath;
+ wchar_t *wmode;
+ wpath=op_utf8_to_utf16(_path);
+ wmode=op_utf8_to_utf16(_mode);
+ if(wmode==NULL)errno=EINVAL;
+ else if(wpath==NULL)errno=ENOENT;
+ else fp=_wfopen(wpath,wmode);
+ _ogg_free(wmode);
+ _ogg_free(wpath);
+ }
+#endif
+ if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+ return fp;
+}
+
+void *op_fdopen(OpusFileCallbacks *_cb,int _fd,const char *_mode){
+ FILE *fp;
+ fp=fdopen(_fd,_mode);
+ if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+ return fp;
+}
+
+void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode,
+ void *_stream){
+ FILE *fp;
+#if !defined(_WIN32)
+ fp=freopen(_path,_mode,(FILE *)_stream);
+#else
+ fp=NULL;
+ if(_path==NULL||_mode==NULL)errno=EINVAL;
+ else{
+ wchar_t *wpath;
+ wchar_t *wmode;
+ wpath=op_utf8_to_utf16(_path);
+ wmode=op_utf8_to_utf16(_mode);
+ if(wmode==NULL)errno=EINVAL;
+ else if(wpath==NULL)errno=ENOENT;
+ else fp=_wfreopen(wpath,wmode,(FILE *)_stream);
+ _ogg_free(wmode);
+ _ogg_free(wpath);
+ }
+#endif
+ if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+ return fp;
+}
+
+static int op_mem_read(void *_stream,unsigned char *_ptr,int _buf_size){
+ OpusMemStream *stream;
+ ptrdiff_t size;
+ ptrdiff_t pos;
+ stream=(OpusMemStream *)_stream;
+ /*Check for empty read.*/
+ if(_buf_size<=0)return 0;
+ size=stream->size;
+ pos=stream->pos;
+ /*Check for EOF.*/
+ if(pos>=size)return 0;
+ /*Check for a short read.*/
+ _buf_size=(int)OP_MIN(size-pos,_buf_size);
+ memcpy(_ptr,stream->data+pos,_buf_size);
+ pos+=_buf_size;
+ stream->pos=pos;
+ return _buf_size;
+}
+
+static int op_mem_seek(void *_stream,opus_int64 _offset,int _whence){
+ OpusMemStream *stream;
+ ptrdiff_t pos;
+ stream=(OpusMemStream *)_stream;
+ pos=stream->pos;
+ OP_ASSERT(pos>=0);
+ switch(_whence){
+ case SEEK_SET:{
+ /*Check for overflow:*/
+ if(_offset<0||_offset>OP_MEM_DIFF_MAX)return -1;
+ pos=(ptrdiff_t)_offset;
+ }break;
+ case SEEK_CUR:{
+ /*Check for overflow:*/
+ if(_offset<-pos||_offset>OP_MEM_DIFF_MAX-pos)return -1;
+ pos=(ptrdiff_t)(pos+_offset);
+ }break;
+ case SEEK_END:{
+ ptrdiff_t size;
+ size=stream->size;
+ OP_ASSERT(size>=0);
+ /*Check for overflow:*/
+ if(_offset>size||_offset<size-OP_MEM_DIFF_MAX)return -1;
+ pos=(ptrdiff_t)(size-_offset);
+ }break;
+ default:return -1;
+ }
+ stream->pos=pos;
+ return 0;
+}
+
+static opus_int64 op_mem_tell(void *_stream){
+ OpusMemStream *stream;
+ stream=(OpusMemStream *)_stream;
+ return (ogg_int64_t)stream->pos;
+}
+
+static int op_mem_close(void *_stream){
+ _ogg_free(_stream);
+ return 0;
+}
+
+static const OpusFileCallbacks OP_MEM_CALLBACKS={
+ op_mem_read,
+ op_mem_seek,
+ op_mem_tell,
+ op_mem_close
+};
+
+void *op_mem_stream_create(OpusFileCallbacks *_cb,
+ const unsigned char *_data,size_t _size){
+ OpusMemStream *stream;
+ if(_size>OP_MEM_SIZE_MAX)return NULL;
+ stream=(OpusMemStream *)_ogg_malloc(sizeof(*stream));
+ if(stream!=NULL){
+ *_cb=*&OP_MEM_CALLBACKS;
+ stream->data=_data;
+ stream->size=_size;
+ stream->pos=0;
+ }
+ return stream;
+}
diff --git a/drivers/opus/tansig_table.h b/drivers/opus/tansig_table.h
new file mode 100644
index 0000000000..c76f844a72
--- /dev/null
+++ b/drivers/opus/tansig_table.h
@@ -0,0 +1,45 @@
+/* This file is auto-generated by gen_tables */
+
+static const float tansig_table[201] = {
+0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
+0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
+0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
+0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
+0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
+0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
+0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
+0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
+0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
+0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
+0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
+0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
+0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
+0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
+0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
+0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
+0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
+0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
+0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
+0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
+0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
+0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
+0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
+0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
+0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
+0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
+0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
+0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
+0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
+0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
+0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
+0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
+0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
+0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
+0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
+0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f,
+};
diff --git a/drivers/opus/wincerts.c b/drivers/opus/wincerts.c
new file mode 100644
index 0000000000..568a085e43
--- /dev/null
+++ b/drivers/opus/wincerts.c
@@ -0,0 +1,171 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2013 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+
+/*This should really be part of OpenSSL, but there's been a patch [1] sitting
+ in their bugtracker for over two years that implements this, without any
+ action, so I'm giving up and re-implementing it locally.
+
+ [1] <http://rt.openssl.org/Ticket/Display.html?id=2158>*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#if defined(OP_ENABLE_HTTP)&&defined(_WIN32)
+/*You must include windows.h before wincrypt.h and x509.h.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+/*You must include wincrypt.h before x509.h, too, or X509_NAME doesn't get
+ defined properly.*/
+# include <wincrypt.h>
+# include <openssl/ssl.h>
+# include <openssl/err.h>
+# include <openssl/x509.h>
+
+static int op_capi_new(X509_LOOKUP *_lu){
+ HCERTSTORE h_store;
+ h_store=CertOpenStore(CERT_STORE_PROV_SYSTEM_A,0,0,
+ CERT_STORE_OPEN_EXISTING_FLAG|CERT_STORE_READONLY_FLAG|
+ CERT_SYSTEM_STORE_CURRENT_USER|CERT_STORE_SHARE_CONTEXT_FLAG,"ROOT");
+ if(h_store!=NULL){
+ _lu->method_data=(char *)h_store;
+ return 1;
+ }
+ return 0;
+}
+
+static void op_capi_free(X509_LOOKUP *_lu){
+ HCERTSTORE h_store;
+ h_store=(HCERTSTORE)_lu->method_data;
+# if defined(OP_ENABLE_ASSERTIONS)
+ OP_ALWAYS_TRUE(CertCloseStore(h_store,CERT_CLOSE_STORE_CHECK_FLAG));
+# else
+ CertCloseStore(h_store,0);
+# endif
+}
+
+static int op_capi_retrieve_by_subject(X509_LOOKUP *_lu,int _type,
+ X509_NAME *_name,X509_OBJECT *_ret){
+ X509_OBJECT *obj;
+ CRYPTO_w_lock(CRYPTO_LOCK_X509_STORE);
+ obj=X509_OBJECT_retrieve_by_subject(_lu->store_ctx->objs,_type,_name);
+ CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE);
+ if(obj!=NULL){
+ _ret->type=obj->type;
+ memcpy(&_ret->data,&obj->data,sizeof(_ret->data));
+ return 1;
+ }
+ return 0;
+}
+
+static int op_capi_get_by_subject(X509_LOOKUP *_lu,int _type,X509_NAME *_name,
+ X509_OBJECT *_ret){
+ HCERTSTORE h_store;
+ if(_name==NULL)return 0;
+ if(_name->bytes==NULL||_name->bytes->length<=0||_name->modified){
+ if(i2d_X509_NAME(_name,NULL)<0)return 0;
+ OP_ASSERT(_name->bytes->length>0);
+ }
+ h_store=(HCERTSTORE)_lu->method_data;
+ switch(_type){
+ case X509_LU_X509:{
+ CERT_NAME_BLOB find_para;
+ PCCERT_CONTEXT cert;
+ X509 *x;
+ int ret;
+ /*Although X509_NAME contains a canon_enc field, that "canonical" [1]
+ encoding was just made up by OpenSSL.
+ It doesn't correspond to any actual standard, and since it drops the
+ initial sequence header, won't be recognized by the Crypto API.
+ The assumption here is that CertFindCertificateInStore() will allow any
+ appropriate variations in the encoding when it does its comparison.
+ This is, however, emphatically not true under Wine, which just compares
+ the encodings with memcmp().
+ Most of the time things work anyway, though, and there isn't really
+ anything we can do to make the situation better.
+
+ [1] A "canonical form" is defined as the one where, if you locked 10
+ mathematicians in a room and asked them to come up with a
+ representation for something, it's the answer that 9 of them would
+ give you back.
+ I don't think OpenSSL's encoding qualifies.*/
+ find_para.cbData=_name->bytes->length;
+ find_para.pbData=(unsigned char *)_name->bytes->data;
+ cert=CertFindCertificateInStore(h_store,X509_ASN_ENCODING,0,
+ CERT_FIND_SUBJECT_NAME,&find_para,NULL);
+ if(cert==NULL)return 0;
+ x=d2i_X509(NULL,(const unsigned char **)&cert->pbCertEncoded,
+ cert->cbCertEncoded);
+ CertFreeCertificateContext(cert);
+ if(x==NULL)return 0;
+ ret=X509_STORE_add_cert(_lu->store_ctx,x);
+ X509_free(x);
+ if(ret)return op_capi_retrieve_by_subject(_lu,_type,_name,_ret);
+ }break;
+ case X509_LU_CRL:{
+ CERT_INFO cert_info;
+ CERT_CONTEXT find_para;
+ PCCRL_CONTEXT crl;
+ X509_CRL *x;
+ int ret;
+ ret=op_capi_retrieve_by_subject(_lu,_type,_name,_ret);
+ if(ret>0)return ret;
+ memset(&cert_info,0,sizeof(cert_info));
+ cert_info.Issuer.cbData=_name->bytes->length;
+ cert_info.Issuer.pbData=(unsigned char *)_name->bytes->data;
+ memset(&find_para,0,sizeof(find_para));
+ find_para.pCertInfo=&cert_info;
+ crl=CertFindCRLInStore(h_store,0,0,CRL_FIND_ISSUED_BY,&find_para,NULL);
+ if(crl==NULL)return 0;
+ x=d2i_X509_CRL(NULL,(const unsigned char **)&crl->pbCrlEncoded,
+ crl->cbCrlEncoded);
+ CertFreeCRLContext(crl);
+ if(x==NULL)return 0;
+ ret=X509_STORE_add_crl(_lu->store_ctx,x);
+ X509_CRL_free(x);
+ if(ret)return op_capi_retrieve_by_subject(_lu,_type,_name,_ret);
+ }break;
+ }
+ return 0;
+}
+
+/*This is not const because OpenSSL doesn't allow it, even though it won't
+ write to it.*/
+static X509_LOOKUP_METHOD X509_LOOKUP_CAPI={
+ "Load Crypto API store into cache",
+ op_capi_new,
+ op_capi_free,
+ NULL,
+ NULL,
+ NULL,
+ op_capi_get_by_subject,
+ NULL,
+ NULL,
+ NULL
+};
+
+int SSL_CTX_set_default_verify_paths_win32(SSL_CTX *_ssl_ctx){
+ X509_STORE *store;
+ X509_LOOKUP *lu;
+ /*We intentionally do not add the normal default paths, as they are usually
+ wrong, and are just asking to be used as an exploit vector.*/
+ store=SSL_CTX_get_cert_store(_ssl_ctx);
+ OP_ASSERT(store!=NULL);
+ lu=X509_STORE_add_lookup(store,&X509_LOOKUP_CAPI);
+ if(lu==NULL)return 0;
+ ERR_clear_error();
+ return 1;
+}
+
+#endif
diff --git a/drivers/opus/winerrno.h b/drivers/opus/winerrno.h
new file mode 100644
index 0000000000..32a90b4ee1
--- /dev/null
+++ b/drivers/opus/winerrno.h
@@ -0,0 +1,90 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+#if !defined(_opusfile_winerrno_h)
+# define _opusfile_winerrno_h (1)
+
+# include <errno.h>
+# include <winerror.h>
+
+/*These conflict with the MSVC errno.h definitions, but we don't need to use
+ the original ones in any file that deals with sockets.
+ We could map the WSA errors to the errno.h ones (most of which are only
+ available on sufficiently new versions of MSVC), but they aren't ordered the
+ same, and given how rarely we actually look at the values, I don't think
+ it's worth a lookup table.*/
+# undef EWOULDBLOCK
+# undef EINPROGRESS
+# undef EALREADY
+# undef ENOTSOCK
+# undef EDESTADDRREQ
+# undef EMSGSIZE
+# undef EPROTOTYPE
+# undef ENOPROTOOPT
+# undef EPROTONOSUPPORT
+# undef EOPNOTSUPP
+# undef EAFNOSUPPORT
+# undef EADDRINUSE
+# undef EADDRNOTAVAIL
+# undef ENETDOWN
+# undef ENETUNREACH
+# undef ENETRESET
+# undef ECONNABORTED
+# undef ECONNRESET
+# undef ENOBUFS
+# undef EISCONN
+# undef ENOTCONN
+# undef ETIMEDOUT
+# undef ECONNREFUSED
+# undef ELOOP
+# undef ENAMETOOLONG
+# undef EHOSTUNREACH
+# undef ENOTEMPTY
+
+# define EWOULDBLOCK (WSAEWOULDBLOCK-WSABASEERR)
+# define EINPROGRESS (WSAEINPROGRESS-WSABASEERR)
+# define EALREADY (WSAEALREADY-WSABASEERR)
+# define ENOTSOCK (WSAENOTSOCK-WSABASEERR)
+# define EDESTADDRREQ (WSAEDESTADDRREQ-WSABASEERR)
+# define EMSGSIZE (WSAEMSGSIZE-WSABASEERR)
+# define EPROTOTYPE (WSAEPROTOTYPE-WSABASEERR)
+# define ENOPROTOOPT (WSAENOPROTOOPT-WSABASEERR)
+# define EPROTONOSUPPORT (WSAEPROTONOSUPPORT-WSABASEERR)
+# define ESOCKTNOSUPPORT (WSAESOCKTNOSUPPORT-WSABASEERR)
+# define EOPNOTSUPP (WSAEOPNOTSUPP-WSABASEERR)
+# define EPFNOSUPPORT (WSAEPFNOSUPPORT-WSABASEERR)
+# define EAFNOSUPPORT (WSAEAFNOSUPPORT-WSABASEERR)
+# define EADDRINUSE (WSAEADDRINUSE-WSABASEERR)
+# define EADDRNOTAVAIL (WSAEADDRNOTAVAIL-WSABASEERR)
+# define ENETDOWN (WSAENETDOWN-WSABASEERR)
+# define ENETUNREACH (WSAENETUNREACH-WSABASEERR)
+# define ENETRESET (WSAENETRESET-WSABASEERR)
+# define ECONNABORTED (WSAECONNABORTED-WSABASEERR)
+# define ECONNRESET (WSAECONNRESET-WSABASEERR)
+# define ENOBUFS (WSAENOBUFS-WSABASEERR)
+# define EISCONN (WSAEISCONN-WSABASEERR)
+# define ENOTCONN (WSAENOTCONN-WSABASEERR)
+# define ESHUTDOWN (WSAESHUTDOWN-WSABASEERR)
+# define ETOOMANYREFS (WSAETOOMANYREFS-WSABASEERR)
+# define ETIMEDOUT (WSAETIMEDOUT-WSABASEERR)
+# define ECONNREFUSED (WSAECONNREFUSED-WSABASEERR)
+# define ELOOP (WSAELOOP-WSABASEERR)
+# define ENAMETOOLONG (WSAENAMETOOLONG-WSABASEERR)
+# define EHOSTDOWN (WSAEHOSTDOWN-WSABASEERR)
+# define EHOSTUNREACH (WSAEHOSTUNREACH-WSABASEERR)
+# define ENOTEMPTY (WSAENOTEMPTY-WSABASEERR)
+# define EPROCLIM (WSAEPROCLIM-WSABASEERR)
+# define EUSERS (WSAEUSERS-WSABASEERR)
+# define EDQUOT (WSAEDQUOT-WSABASEERR)
+# define ESTALE (WSAESTALE-WSABASEERR)
+# define EREMOTE (WSAEREMOTE-WSABASEERR)
+
+#endif
diff --git a/drivers/pulseaudio/audio_driver_pulseaudio.cpp b/drivers/pulseaudio/audio_driver_pulseaudio.cpp
index dfe9ddc55f..4cda141f92 100644
--- a/drivers/pulseaudio/audio_driver_pulseaudio.cpp
+++ b/drivers/pulseaudio/audio_driver_pulseaudio.cpp
@@ -82,6 +82,17 @@ Error AudioDriverPulseAudio::init() {
return OK;
}
+float AudioDriverPulseAudio::get_latency() {
+
+ if (latency==0) { //only do this once since it's approximate anyway
+ int error_code;
+ pa_usec_t palat = pa_simple_get_latency( pulse,&error_code);
+ latency=double(palat)/1000000.0;
+ }
+
+ return latency;
+}
+
void AudioDriverPulseAudio::thread_func(void* p_udata) {
AudioDriverPulseAudio* ad = (AudioDriverPulseAudio*)p_udata;
@@ -121,6 +132,7 @@ void AudioDriverPulseAudio::thread_func(void* p_udata) {
ad->exit_thread = true;
break;
}
+
}
ad->thread_exited = true;
@@ -185,6 +197,7 @@ AudioDriverPulseAudio::AudioDriverPulseAudio() {
mutex = NULL;
thread = NULL;
pulse = NULL;
+ latency=0;
}
AudioDriverPulseAudio::~AudioDriverPulseAudio() {
diff --git a/drivers/pulseaudio/audio_driver_pulseaudio.h b/drivers/pulseaudio/audio_driver_pulseaudio.h
index e82e0c24be..e7c8bcce36 100644
--- a/drivers/pulseaudio/audio_driver_pulseaudio.h
+++ b/drivers/pulseaudio/audio_driver_pulseaudio.h
@@ -58,6 +58,8 @@ class AudioDriverPulseAudio : public AudioDriverSW {
mutable bool exit_thread;
bool pcm_open;
+ float latency;
+
public:
const char* get_name() const {
@@ -72,6 +74,9 @@ public:
virtual void unlock();
virtual void finish();
+ virtual float get_latency();
+
+
AudioDriverPulseAudio();
~AudioDriverPulseAudio();
};
diff --git a/drivers/register_driver_types.cpp b/drivers/register_driver_types.cpp
index 01f6a8b5b0..2647d23011 100644
--- a/drivers/register_driver_types.cpp
+++ b/drivers/register_driver_types.cpp
@@ -37,13 +37,19 @@
#include "vorbis/audio_stream_ogg_vorbis.h"
#endif
+#ifdef OPUS_ENABLED
+#include "opus/audio_stream_opus.h"
+#endif
#ifdef SPEEX_ENABLED
#include "speex/audio_stream_speex.h"
#endif
#ifdef THEORA_ENABLED
-//#include "theora/video_stream_theora.h"
+#include "theora/video_stream_theora.h"
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
#include "theoraplayer/video_stream_theoraplayer.h"
#endif
@@ -85,12 +91,19 @@ static ResourceFormatLoaderAudioStreamOGG *vorbis_stream_loader=NULL;
static ResourceFormatLoaderAudioStreamOGGVorbis *vorbis_stream_loader=NULL;
#endif
+#ifdef OPUS_ENABLED
+static ResourceFormatLoaderAudioStreamOpus *opus_stream_loader=NULL;
+#endif
+
#ifdef SPEEX_ENABLED
static ResourceFormatLoaderAudioStreamSpeex *speex_stream_loader=NULL;
#endif
#ifdef THEORA_ENABLED
-//static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL;
+static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL;
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
static ResourceFormatLoaderVideoStreamTheoraplayer* theoraplayer_stream_loader = NULL;
#endif
@@ -169,6 +182,11 @@ void register_driver_types() {
ObjectTypeDB::register_type<AudioStreamOGGVorbis>();
#endif
+#ifdef OPUS_ENABLED
+ opus_stream_loader=memnew( ResourceFormatLoaderAudioStreamOpus );
+ ResourceLoader::add_resource_format_loader( opus_stream_loader );
+ ObjectTypeDB::register_type<AudioStreamOpus>();
+#endif
#ifdef DDS_ENABLED
resource_loader_dds = memnew( ResourceFormatDDS );
@@ -205,9 +223,12 @@ void register_driver_types() {
#endif
#ifdef THEORA_ENABLED
- //theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora );
- //ResourceLoader::add_resource_format_loader(theora_stream_loader);
- //ObjectTypeDB::register_type<VideoStreamTheora>();
+ theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora );
+ ResourceLoader::add_resource_format_loader(theora_stream_loader);
+ ObjectTypeDB::register_type<VideoStreamTheora>();
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
theoraplayer_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheoraplayer );
ResourceLoader::add_resource_format_loader(theoraplayer_stream_loader);
ObjectTypeDB::register_type<VideoStreamTheoraplayer>();
@@ -239,12 +260,19 @@ void unregister_driver_types() {
memdelete( vorbis_stream_loader );
#endif
+#ifdef OPUS_ENABLED
+ memdelete( opus_stream_loader );
+#endif
+
#ifdef SPEEX_ENABLED
memdelete( speex_stream_loader );
#endif
#ifdef THEORA_ENABLED
- //memdelete (theora_stream_loader);
+ memdelete (theora_stream_loader);
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
memdelete (theoraplayer_stream_loader);
#endif
diff --git a/drivers/rtaudio/RtAudio.cpp b/drivers/rtaudio/RtAudio.cpp
index 8876f72e21..72ca836907 100644
--- a/drivers/rtaudio/RtAudio.cpp
+++ b/drivers/rtaudio/RtAudio.cpp
@@ -1,10234 +1,10234 @@
-#ifdef RTAUDIO_ENABLED
-/************************************************************************/
-/*! \class RtAudio
- \brief Realtime audio i/o C++ classes.
-
- RtAudio provides a common API (Application Programming Interface)
- for realtime audio input/output across Linux (native ALSA, Jack,
- and OSS), Macintosh OS X (CoreAudio and Jack), and Windows
- (DirectSound, ASIO and WASAPI) operating systems.
-
- RtAudio WWW site: http://www.music.mcgill.ca/~gary/rtaudio/
-
- RtAudio: realtime audio i/o C++ classes
- Copyright (c) 2001-2014 Gary P. Scavone
-
- Permission is hereby granted, free of charge, to any person
- obtaining a copy of this software and associated documentation files
- (the "Software"), to deal in the Software without restriction,
- including without limitation the rights to use, copy, modify, merge,
- publish, distribute, sublicense, and/or sell copies of the Software,
- and to permit persons to whom the Software is furnished to do so,
- subject to the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- Any person wishing to distribute modifications to the Software is
- asked to send the modifications to the original developer so that
- they can be incorporated into the canonical version. This is,
- however, not a binding provision of this license.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
- ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
- CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-/************************************************************************/
-
-// RtAudio: Version 4.1.1
-
-#include "RtAudio.h"
-#include <iostream>
-#include <cstdlib>
-#include <cstring>
-#include <climits>
-#include <algorithm>
-
-// Static variable definitions.
-const unsigned int RtApi::MAX_SAMPLE_RATES = 14;
-const unsigned int RtApi::SAMPLE_RATES[] = {
- 4000, 5512, 8000, 9600, 11025, 16000, 22050,
- 32000, 44100, 48000, 88200, 96000, 176400, 192000
-};
-
-#if defined(__WINDOWS_DS__) || defined(__WINDOWS_ASIO__) || defined(__WINDOWS_WASAPI__)
-#ifdef WINRT_ENABLED
- #define MUTEX_INITIALIZE(A) InitializeCriticalSectionEx(A, 0, 0)
-#else
- #define MUTEX_INITIALIZE(A) InitializeCriticalSection(A)
-#endif
- #define MUTEX_DESTROY(A) DeleteCriticalSection(A)
- #define MUTEX_LOCK(A) EnterCriticalSection(A)
- #define MUTEX_UNLOCK(A) LeaveCriticalSection(A)
-
- #include "tchar.h"
-
- static std::string convertCharPointerToStdString(const char *text)
- {
- return std::string(text);
- }
-
- static std::string convertCharPointerToStdString(const wchar_t *text)
- {
- int length = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
- std::string s( length-1, '\0' );
- WideCharToMultiByte(CP_UTF8, 0, text, -1, &s[0], length, NULL, NULL);
- return s;
- }
-
-#elif defined(__LINUX_ALSA__) || defined(__LINUX_PULSE__) || defined(__UNIX_JACK__) || defined(__LINUX_OSS__) || defined(__MACOSX_CORE__)
- // pthread API
- #define MUTEX_INITIALIZE(A) pthread_mutex_init(A, NULL)
- #define MUTEX_DESTROY(A) pthread_mutex_destroy(A)
- #define MUTEX_LOCK(A) pthread_mutex_lock(A)
- #define MUTEX_UNLOCK(A) pthread_mutex_unlock(A)
-#else
- #define MUTEX_INITIALIZE(A) abs(*A) // dummy definitions
- #define MUTEX_DESTROY(A) abs(*A) // dummy definitions
-#endif
-
-// *************************************************** //
-//
-// RtAudio definitions.
-//
-// *************************************************** //
-
-std::string RtAudio :: getVersion( void ) throw()
-{
- return RTAUDIO_VERSION;
-}
-
-void RtAudio :: getCompiledApi( std::vector<RtAudio::Api> &apis ) throw()
-{
- apis.clear();
-
- // The order here will control the order of RtAudio's API search in
- // the constructor.
-#if defined(__UNIX_JACK__)
- apis.push_back( UNIX_JACK );
-#endif
-#if defined(__LINUX_ALSA__)
- apis.push_back( LINUX_ALSA );
-#endif
-#if defined(__LINUX_PULSE__)
- apis.push_back( LINUX_PULSE );
-#endif
-#if defined(__LINUX_OSS__)
- apis.push_back( LINUX_OSS );
-#endif
-#if defined(__WINDOWS_ASIO__)
- apis.push_back( WINDOWS_ASIO );
-#endif
-#if defined(__WINDOWS_WASAPI__)
- apis.push_back( WINDOWS_WASAPI );
-#endif
-#if defined(__WINDOWS_DS__)
- apis.push_back( WINDOWS_DS );
-#endif
-#if defined(__MACOSX_CORE__)
- apis.push_back( MACOSX_CORE );
-#endif
-#if defined(__RTAUDIO_DUMMY__)
- apis.push_back( RTAUDIO_DUMMY );
-#endif
-}
-
-void RtAudio :: openRtApi( RtAudio::Api api )
-{
- if ( rtapi_ )
- delete rtapi_;
- rtapi_ = 0;
-
-#if defined(__UNIX_JACK__)
- if ( api == UNIX_JACK )
- rtapi_ = new RtApiJack();
-#endif
-#if defined(__LINUX_ALSA__)
- if ( api == LINUX_ALSA )
- rtapi_ = new RtApiAlsa();
-#endif
-#if defined(__LINUX_PULSE__)
- if ( api == LINUX_PULSE )
- rtapi_ = new RtApiPulse();
-#endif
-#if defined(__LINUX_OSS__)
- if ( api == LINUX_OSS )
- rtapi_ = new RtApiOss();
-#endif
-#if defined(__WINDOWS_ASIO__)
- if ( api == WINDOWS_ASIO )
- rtapi_ = new RtApiAsio();
-#endif
-#if defined(__WINDOWS_WASAPI__)
- if ( api == WINDOWS_WASAPI )
- rtapi_ = new RtApiWasapi();
-#endif
-#if defined(__WINDOWS_DS__)
- if ( api == WINDOWS_DS )
- rtapi_ = new RtApiDs();
-#endif
-#if defined(__MACOSX_CORE__)
- if ( api == MACOSX_CORE )
- rtapi_ = new RtApiCore();
-#endif
-#if defined(__RTAUDIO_DUMMY__)
- if ( api == RTAUDIO_DUMMY )
- rtapi_ = new RtApiDummy();
-#endif
-}
-
-RtAudio :: RtAudio( RtAudio::Api api )
-{
- rtapi_ = 0;
-
- if ( api != UNSPECIFIED ) {
- // Attempt to open the specified API.
- openRtApi( api );
- if ( rtapi_ ) return;
-
- // No compiled support for specified API value. Issue a debug
- // warning and continue as if no API was specified.
- std::cerr << "\nRtAudio: no compiled support for specified API argument!\n" << std::endl;
- }
-
- // Iterate through the compiled APIs and return as soon as we find
- // one with at least one device or we reach the end of the list.
- std::vector< RtAudio::Api > apis;
- getCompiledApi( apis );
- for ( unsigned int i=0; i<apis.size(); i++ ) {
- openRtApi( apis[i] );
- if ( rtapi_ && rtapi_->getDeviceCount() ) break;
- }
-
- if ( rtapi_ ) return;
-
- // It should not be possible to get here because the preprocessor
- // definition __RTAUDIO_DUMMY__ is automatically defined if no
- // API-specific definitions are passed to the compiler. But just in
- // case something weird happens, we'll thow an error.
- std::string errorText = "\nRtAudio: no compiled API support found ... critical error!!\n\n";
- throw( RtAudioError( errorText, RtAudioError::UNSPECIFIED ) );
-}
-
-RtAudio :: ~RtAudio() throw()
-{
- if ( rtapi_ )
- delete rtapi_;
-}
-
-void RtAudio :: openStream( RtAudio::StreamParameters *outputParameters,
- RtAudio::StreamParameters *inputParameters,
- RtAudioFormat format, unsigned int sampleRate,
- unsigned int *bufferFrames,
- RtAudioCallback callback, void *userData,
- RtAudio::StreamOptions *options,
- RtAudioErrorCallback errorCallback )
-{
- return rtapi_->openStream( outputParameters, inputParameters, format,
- sampleRate, bufferFrames, callback,
- userData, options, errorCallback );
-}
-
-// *************************************************** //
-//
-// Public RtApi definitions (see end of file for
-// private or protected utility functions).
-//
-// *************************************************** //
-
-RtApi :: RtApi()
-{
- stream_.state = STREAM_CLOSED;
- stream_.mode = UNINITIALIZED;
- stream_.apiHandle = 0;
- stream_.userBuffer[0] = 0;
- stream_.userBuffer[1] = 0;
- MUTEX_INITIALIZE( &stream_.mutex );
- showWarnings_ = true;
- firstErrorOccurred_ = false;
-}
-
-RtApi :: ~RtApi()
-{
- MUTEX_DESTROY( &stream_.mutex );
-}
-
-void RtApi :: openStream( RtAudio::StreamParameters *oParams,
- RtAudio::StreamParameters *iParams,
- RtAudioFormat format, unsigned int sampleRate,
- unsigned int *bufferFrames,
- RtAudioCallback callback, void *userData,
- RtAudio::StreamOptions *options,
- RtAudioErrorCallback errorCallback )
-{
- if ( stream_.state != STREAM_CLOSED ) {
- errorText_ = "RtApi::openStream: a stream is already open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- // Clear stream information potentially left from a previously open stream.
- clearStreamInfo();
-
- if ( oParams && oParams->nChannels < 1 ) {
- errorText_ = "RtApi::openStream: a non-NULL output StreamParameters structure cannot have an nChannels value less than one.";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- if ( iParams && iParams->nChannels < 1 ) {
- errorText_ = "RtApi::openStream: a non-NULL input StreamParameters structure cannot have an nChannels value less than one.";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- if ( oParams == NULL && iParams == NULL ) {
- errorText_ = "RtApi::openStream: input and output StreamParameters structures are both NULL!";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- if ( formatBytes(format) == 0 ) {
- errorText_ = "RtApi::openStream: 'format' parameter value is undefined.";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- unsigned int nDevices = getDeviceCount();
- unsigned int oChannels = 0;
- if ( oParams ) {
- oChannels = oParams->nChannels;
- if ( oParams->deviceId >= nDevices ) {
- errorText_ = "RtApi::openStream: output device parameter value is invalid.";
- error( RtAudioError::INVALID_USE );
- return;
- }
- }
-
- unsigned int iChannels = 0;
- if ( iParams ) {
- iChannels = iParams->nChannels;
- if ( iParams->deviceId >= nDevices ) {
- errorText_ = "RtApi::openStream: input device parameter value is invalid.";
- error( RtAudioError::INVALID_USE );
- return;
- }
- }
-
- bool result;
-
- if ( oChannels > 0 ) {
-
- result = probeDeviceOpen( oParams->deviceId, OUTPUT, oChannels, oParams->firstChannel,
- sampleRate, format, bufferFrames, options );
- if ( result == false ) {
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- if ( iChannels > 0 ) {
-
- result = probeDeviceOpen( iParams->deviceId, INPUT, iChannels, iParams->firstChannel,
- sampleRate, format, bufferFrames, options );
- if ( result == false ) {
- if ( oChannels > 0 ) closeStream();
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- stream_.callbackInfo.callback = (void *) callback;
- stream_.callbackInfo.userData = userData;
- stream_.callbackInfo.errorCallback = (void *) errorCallback;
-
- if ( options ) options->numberOfBuffers = stream_.nBuffers;
- stream_.state = STREAM_STOPPED;
-}
-
-unsigned int RtApi :: getDefaultInputDevice( void )
-{
- // Should be implemented in subclasses if possible.
- return 0;
-}
-
-unsigned int RtApi :: getDefaultOutputDevice( void )
-{
- // Should be implemented in subclasses if possible.
- return 0;
-}
-
-void RtApi :: closeStream( void )
-{
- // MUST be implemented in subclasses!
- return;
-}
-
-bool RtApi :: probeDeviceOpen( unsigned int /*device*/, StreamMode /*mode*/, unsigned int /*channels*/,
- unsigned int /*firstChannel*/, unsigned int /*sampleRate*/,
- RtAudioFormat /*format*/, unsigned int * /*bufferSize*/,
- RtAudio::StreamOptions * /*options*/ )
-{
- // MUST be implemented in subclasses!
- return FAILURE;
-}
-
-void RtApi :: tickStreamTime( void )
-{
- // Subclasses that do not provide their own implementation of
- // getStreamTime should call this function once per buffer I/O to
- // provide basic stream time support.
-
- stream_.streamTime += ( stream_.bufferSize * 1.0 / stream_.sampleRate );
-
-#if defined( HAVE_GETTIMEOFDAY )
- gettimeofday( &stream_.lastTickTimestamp, NULL );
-#endif
-}
-
-long RtApi :: getStreamLatency( void )
-{
- verifyStream();
-
- long totalLatency = 0;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
- totalLatency = stream_.latency[0];
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
- totalLatency += stream_.latency[1];
-
- return totalLatency;
-}
-
-double RtApi :: getStreamTime( void )
-{
- verifyStream();
-
-#if defined( HAVE_GETTIMEOFDAY )
- // Return a very accurate estimate of the stream time by
- // adding in the elapsed time since the last tick.
- struct timeval then;
- struct timeval now;
-
- if ( stream_.state != STREAM_RUNNING || stream_.streamTime == 0.0 )
- return stream_.streamTime;
-
- gettimeofday( &now, NULL );
- then = stream_.lastTickTimestamp;
- return stream_.streamTime +
- ((now.tv_sec + 0.000001 * now.tv_usec) -
- (then.tv_sec + 0.000001 * then.tv_usec));
-#else
- return stream_.streamTime;
-#endif
-}
-
-void RtApi :: setStreamTime( double time )
-{
- verifyStream();
-
- if ( time >= 0.0 )
- stream_.streamTime = time;
-}
-
-unsigned int RtApi :: getStreamSampleRate( void )
-{
- verifyStream();
-
- return stream_.sampleRate;
-}
-
-
-// *************************************************** //
-//
-// OS/API-specific methods.
-//
-// *************************************************** //
-
-#if defined(__MACOSX_CORE__)
-
-// The OS X CoreAudio API is designed to use a separate callback
-// procedure for each of its audio devices. A single RtAudio duplex
-// stream using two different devices is supported here, though it
-// cannot be guaranteed to always behave correctly because we cannot
-// synchronize these two callbacks.
-//
-// A property listener is installed for over/underrun information.
-// However, no functionality is currently provided to allow property
-// listeners to trigger user handlers because it is unclear what could
-// be done if a critical stream parameter (buffer size, sample rate,
-// device disconnect) notification arrived. The listeners entail
-// quite a bit of extra code and most likely, a user program wouldn't
-// be prepared for the result anyway. However, we do provide a flag
-// to the client callback function to inform of an over/underrun.
-
-// A structure to hold various information related to the CoreAudio API
-// implementation.
-struct CoreHandle {
- AudioDeviceID id[2]; // device ids
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- AudioDeviceIOProcID procId[2];
-#endif
- UInt32 iStream[2]; // device stream index (or first if using multiple)
- UInt32 nStreams[2]; // number of streams to use
- bool xrun[2];
- char *deviceBuffer;
- pthread_cond_t condition;
- int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
-
- CoreHandle()
- :deviceBuffer(0), drainCounter(0), internalDrain(false) { nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-RtApiCore:: RtApiCore()
-{
-#if defined( AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER )
- // This is a largely undocumented but absolutely necessary
- // requirement starting with OS-X 10.6. If not called, queries and
- // updates to various audio device properties are not handled
- // correctly.
- CFRunLoopRef theRunLoop = NULL;
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyRunLoop,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectSetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, sizeof(CFRunLoopRef), &theRunLoop);
- if ( result != noErr ) {
- errorText_ = "RtApiCore::RtApiCore: error setting run loop property!";
- error( RtAudioError::WARNING );
- }
-#endif
-}
-
-RtApiCore :: ~RtApiCore()
-{
- // The subclass destructor gets called before the base class
- // destructor, so close an existing stream before deallocating
- // apiDeviceId memory.
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiCore :: getDeviceCount( void )
-{
- // Find out how many audio devices there are, if any.
- UInt32 dataSize;
- AudioObjectPropertyAddress propertyAddress = { kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyDataSize( kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDeviceCount: OS-X error getting device info!";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- return dataSize / sizeof( AudioDeviceID );
-}
-
-unsigned int RtApiCore :: getDefaultInputDevice( void )
-{
- unsigned int nDevices = getDeviceCount();
- if ( nDevices <= 1 ) return 0;
-
- AudioDeviceID id;
- UInt32 dataSize = sizeof( AudioDeviceID );
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- dataSize *= nDevices;
- AudioDeviceID deviceList[ nDevices ];
- property.mSelector = kAudioHardwarePropertyDevices;
- result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device IDs.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- for ( unsigned int i=0; i<nDevices; i++ )
- if ( id == deviceList[i] ) return i;
-
- errorText_ = "RtApiCore::getDefaultInputDevice: No default device found!";
- error( RtAudioError::WARNING );
- return 0;
-}
-
-unsigned int RtApiCore :: getDefaultOutputDevice( void )
-{
- unsigned int nDevices = getDeviceCount();
- if ( nDevices <= 1 ) return 0;
-
- AudioDeviceID id;
- UInt32 dataSize = sizeof( AudioDeviceID );
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- dataSize = sizeof( AudioDeviceID ) * nDevices;
- AudioDeviceID deviceList[ nDevices ];
- property.mSelector = kAudioHardwarePropertyDevices;
- result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device IDs.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- for ( unsigned int i=0; i<nDevices; i++ )
- if ( id == deviceList[i] ) return i;
-
- errorText_ = "RtApiCore::getDefaultOutputDevice: No default device found!";
- error( RtAudioError::WARNING );
- return 0;
-}
-
-RtAudio::DeviceInfo RtApiCore :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- // Get device ID
- unsigned int nDevices = getDeviceCount();
- if ( nDevices == 0 ) {
- errorText_ = "RtApiCore::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiCore::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- AudioDeviceID deviceList[ nDevices ];
- UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
- 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDeviceInfo: OS-X system error getting device IDs.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- AudioDeviceID id = deviceList[ device ];
-
- // Get the device name.
- info.name.erase();
- CFStringRef cfname;
- dataSize = sizeof( CFStringRef );
- property.mSelector = kAudioObjectPropertyManufacturer;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device manufacturer.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- //const char *mname = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
- int length = CFStringGetLength(cfname);
- char *mname = (char *)malloc(length * 3 + 1);
-#if defined( UNICODE ) || defined( _UNICODE )
- CFStringGetCString(cfname, mname, length * 3 + 1, kCFStringEncodingUTF8);
-#else
- CFStringGetCString(cfname, mname, length * 3 + 1, CFStringGetSystemEncoding());
-#endif
- info.name.append( (const char *)mname, strlen(mname) );
- info.name.append( ": " );
- CFRelease( cfname );
- free(mname);
-
- property.mSelector = kAudioObjectPropertyName;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device name.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- //const char *name = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
- length = CFStringGetLength(cfname);
- char *name = (char *)malloc(length * 3 + 1);
-#if defined( UNICODE ) || defined( _UNICODE )
- CFStringGetCString(cfname, name, length * 3 + 1, kCFStringEncodingUTF8);
-#else
- CFStringGetCString(cfname, name, length * 3 + 1, CFStringGetSystemEncoding());
-#endif
- info.name.append( (const char *)name, strlen(name) );
- CFRelease( cfname );
- free(name);
-
- // Get the output stream "configuration".
- AudioBufferList *bufferList = nil;
- property.mSelector = kAudioDevicePropertyStreamConfiguration;
- property.mScope = kAudioDevicePropertyScopeOutput;
- // property.mElement = kAudioObjectPropertyElementWildcard;
- dataSize = 0;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != noErr || dataSize == 0 ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration info for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Allocate the AudioBufferList.
- bufferList = (AudioBufferList *) malloc( dataSize );
- if ( bufferList == NULL ) {
- errorText_ = "RtApiCore::getDeviceInfo: memory error allocating output AudioBufferList.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
- if ( result != noErr || dataSize == 0 ) {
- free( bufferList );
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get output channel information.
- unsigned int i, nStreams = bufferList->mNumberBuffers;
- for ( i=0; i<nStreams; i++ )
- info.outputChannels += bufferList->mBuffers[i].mNumberChannels;
- free( bufferList );
-
- // Get the input stream "configuration".
- property.mScope = kAudioDevicePropertyScopeInput;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != noErr || dataSize == 0 ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration info for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Allocate the AudioBufferList.
- bufferList = (AudioBufferList *) malloc( dataSize );
- if ( bufferList == NULL ) {
- errorText_ = "RtApiCore::getDeviceInfo: memory error allocating input AudioBufferList.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
- if (result != noErr || dataSize == 0) {
- free( bufferList );
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get input channel information.
- nStreams = bufferList->mNumberBuffers;
- for ( i=0; i<nStreams; i++ )
- info.inputChannels += bufferList->mBuffers[i].mNumberChannels;
- free( bufferList );
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // Probe the device sample rates.
- bool isInput = false;
- if ( info.outputChannels == 0 ) isInput = true;
-
- // Determine the supported sample rates.
- property.mSelector = kAudioDevicePropertyAvailableNominalSampleRates;
- if ( isInput == false ) property.mScope = kAudioDevicePropertyScopeOutput;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != kAudioHardwareNoError || dataSize == 0 ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rate info.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- UInt32 nRanges = dataSize / sizeof( AudioValueRange );
- AudioValueRange rangeList[ nRanges ];
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &rangeList );
- if ( result != kAudioHardwareNoError ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rates.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // The sample rate reporting mechanism is a bit of a mystery. It
- // seems that it can either return individual rates or a range of
- // rates. I assume that if the min / max range values are the same,
- // then that represents a single supported rate and if the min / max
- // range values are different, the device supports an arbitrary
- // range of values (though there might be multiple ranges, so we'll
- // use the most conservative range).
- Float64 minimumRate = 1.0, maximumRate = 10000000000.0;
- bool haveValueRange = false;
- info.sampleRates.clear();
- for ( UInt32 i=0; i<nRanges; i++ ) {
- if ( rangeList[i].mMinimum == rangeList[i].mMaximum ) {
- unsigned int tmpSr = (unsigned int) rangeList[i].mMinimum;
- info.sampleRates.push_back( tmpSr );
-
- if ( !info.preferredSampleRate || ( tmpSr <= 48000 && tmpSr > info.preferredSampleRate ) )
- info.preferredSampleRate = tmpSr;
-
- } else {
- haveValueRange = true;
- if ( rangeList[i].mMinimum > minimumRate ) minimumRate = rangeList[i].mMinimum;
- if ( rangeList[i].mMaximum < maximumRate ) maximumRate = rangeList[i].mMaximum;
- }
- }
-
- if ( haveValueRange ) {
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( SAMPLE_RATES[k] >= (unsigned int) minimumRate && SAMPLE_RATES[k] <= (unsigned int) maximumRate ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
- }
- }
- }
-
- // Sort and remove any redundant values
- std::sort( info.sampleRates.begin(), info.sampleRates.end() );
- info.sampleRates.erase( unique( info.sampleRates.begin(), info.sampleRates.end() ), info.sampleRates.end() );
-
- if ( info.sampleRates.size() == 0 ) {
- errorStream_ << "RtApiCore::probeDeviceInfo: No supported sample rates found for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // CoreAudio always uses 32-bit floating point data for PCM streams.
- // Thus, any other "physical" formats supported by the device are of
- // no interest to the client.
- info.nativeFormats = RTAUDIO_FLOAT32;
-
- if ( info.outputChannels > 0 )
- if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
- if ( info.inputChannels > 0 )
- if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
-
- info.probed = true;
- return info;
-}
-
-static OSStatus callbackHandler( AudioDeviceID inDevice,
- const AudioTimeStamp* /*inNow*/,
- const AudioBufferList* inInputData,
- const AudioTimeStamp* /*inInputTime*/,
- AudioBufferList* outOutputData,
- const AudioTimeStamp* /*inOutputTime*/,
- void* infoPointer )
-{
- CallbackInfo *info = (CallbackInfo *) infoPointer;
-
- RtApiCore *object = (RtApiCore *) info->object;
- if ( object->callbackEvent( inDevice, inInputData, outOutputData ) == false )
- return kAudioHardwareUnspecifiedError;
- else
- return kAudioHardwareNoError;
-}
-
-static OSStatus xrunListener( AudioObjectID /*inDevice*/,
- UInt32 nAddresses,
- const AudioObjectPropertyAddress properties[],
- void* handlePointer )
-{
- CoreHandle *handle = (CoreHandle *) handlePointer;
- for ( UInt32 i=0; i<nAddresses; i++ ) {
- if ( properties[i].mSelector == kAudioDeviceProcessorOverload ) {
- if ( properties[i].mScope == kAudioDevicePropertyScopeInput )
- handle->xrun[1] = true;
- else
- handle->xrun[0] = true;
- }
- }
-
- return kAudioHardwareNoError;
-}
-
-static OSStatus rateListener( AudioObjectID inDevice,
- UInt32 /*nAddresses*/,
- const AudioObjectPropertyAddress /*properties*/[],
- void* ratePointer )
-{
- Float64 *rate = (Float64 *) ratePointer;
- UInt32 dataSize = sizeof( Float64 );
- AudioObjectPropertyAddress property = { kAudioDevicePropertyNominalSampleRate,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- AudioObjectGetPropertyData( inDevice, &property, 0, NULL, &dataSize, rate );
- return kAudioHardwareNoError;
-}
-
-bool RtApiCore :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- // Get device ID
- unsigned int nDevices = getDeviceCount();
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiCore::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiCore::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- AudioDeviceID deviceList[ nDevices ];
- UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
- 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::probeDeviceOpen: OS-X system error getting device IDs.";
- return FAILURE;
- }
-
- AudioDeviceID id = deviceList[ device ];
-
- // Setup for stream mode.
- bool isInput = false;
- if ( mode == INPUT ) {
- isInput = true;
- property.mScope = kAudioDevicePropertyScopeInput;
- }
- else
- property.mScope = kAudioDevicePropertyScopeOutput;
-
- // Get the stream "configuration".
- AudioBufferList *bufferList = nil;
- dataSize = 0;
- property.mSelector = kAudioDevicePropertyStreamConfiguration;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != noErr || dataSize == 0 ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration info for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Allocate the AudioBufferList.
- bufferList = (AudioBufferList *) malloc( dataSize );
- if ( bufferList == NULL ) {
- errorText_ = "RtApiCore::probeDeviceOpen: memory error allocating AudioBufferList.";
- return FAILURE;
- }
-
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
- if (result != noErr || dataSize == 0) {
- free( bufferList );
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Search for one or more streams that contain the desired number of
- // channels. CoreAudio devices can have an arbitrary number of
- // streams and each stream can have an arbitrary number of channels.
- // For each stream, a single buffer of interleaved samples is
- // provided. RtAudio prefers the use of one stream of interleaved
- // data or multiple consecutive single-channel streams. However, we
- // now support multiple consecutive multi-channel streams of
- // interleaved data as well.
- UInt32 iStream, offsetCounter = firstChannel;
- UInt32 nStreams = bufferList->mNumberBuffers;
- bool monoMode = false;
- bool foundStream = false;
-
- // First check that the device supports the requested number of
- // channels.
- UInt32 deviceChannels = 0;
- for ( iStream=0; iStream<nStreams; iStream++ )
- deviceChannels += bufferList->mBuffers[iStream].mNumberChannels;
-
- if ( deviceChannels < ( channels + firstChannel ) ) {
- free( bufferList );
- errorStream_ << "RtApiCore::probeDeviceOpen: the device (" << device << ") does not support the requested channel count.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Look for a single stream meeting our needs.
- UInt32 firstStream, streamCount = 1, streamChannels = 0, channelOffset = 0;
- for ( iStream=0; iStream<nStreams; iStream++ ) {
- streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
- if ( streamChannels >= channels + offsetCounter ) {
- firstStream = iStream;
- channelOffset = offsetCounter;
- foundStream = true;
- break;
- }
- if ( streamChannels > offsetCounter ) break;
- offsetCounter -= streamChannels;
- }
-
- // If we didn't find a single stream above, then we should be able
- // to meet the channel specification with multiple streams.
- if ( foundStream == false ) {
- monoMode = true;
- offsetCounter = firstChannel;
- for ( iStream=0; iStream<nStreams; iStream++ ) {
- streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
- if ( streamChannels > offsetCounter ) break;
- offsetCounter -= streamChannels;
- }
-
- firstStream = iStream;
- channelOffset = offsetCounter;
- Int32 channelCounter = channels + offsetCounter - streamChannels;
-
- if ( streamChannels > 1 ) monoMode = false;
- while ( channelCounter > 0 ) {
- streamChannels = bufferList->mBuffers[++iStream].mNumberChannels;
- if ( streamChannels > 1 ) monoMode = false;
- channelCounter -= streamChannels;
- streamCount++;
- }
- }
-
- free( bufferList );
-
- // Determine the buffer size.
- AudioValueRange bufferRange;
- dataSize = sizeof( AudioValueRange );
- property.mSelector = kAudioDevicePropertyBufferFrameSizeRange;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &bufferRange );
-
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting buffer size range for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- if ( bufferRange.mMinimum > *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMinimum;
- else if ( bufferRange.mMaximum < *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMaximum;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) *bufferSize = (unsigned long) bufferRange.mMinimum;
-
- // Set the buffer size. For multiple streams, I'm assuming we only
- // need to make this setting for the master channel.
- UInt32 theSize = (UInt32) *bufferSize;
- dataSize = sizeof( UInt32 );
- property.mSelector = kAudioDevicePropertyBufferFrameSize;
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &theSize );
-
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting the buffer size for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // If attempting to setup a duplex stream, the bufferSize parameter
- // MUST be the same in both directions!
- *bufferSize = theSize;
- if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- stream_.bufferSize = *bufferSize;
- stream_.nBuffers = 1;
-
- // Try to set "hog" mode ... it's not clear to me this is working.
- if ( options && options->flags & RTAUDIO_HOG_DEVICE ) {
- pid_t hog_pid;
- dataSize = sizeof( hog_pid );
- property.mSelector = kAudioDevicePropertyHogMode;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &hog_pid );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting 'hog' state!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- if ( hog_pid != getpid() ) {
- hog_pid = getpid();
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &hog_pid );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting 'hog' state!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
- }
-
- // Check and if necessary, change the sample rate for the device.
- Float64 nominalRate;
- dataSize = sizeof( Float64 );
- property.mSelector = kAudioDevicePropertyNominalSampleRate;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &nominalRate );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting current sample rate.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Only change the sample rate if off by more than 1 Hz.
- if ( fabs( nominalRate - (double)sampleRate ) > 1.0 ) {
-
- // Set a property listener for the sample rate change
- Float64 reportedRate = 0.0;
- AudioObjectPropertyAddress tmp = { kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- result = AudioObjectAddPropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate property listener for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- nominalRate = (Float64) sampleRate;
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &nominalRate );
- if ( result != noErr ) {
- AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Now wait until the reported nominal rate is what we just set.
- UInt32 microCounter = 0;
- while ( reportedRate != nominalRate ) {
- microCounter += 5000;
- if ( microCounter > 5000000 ) break;
- usleep( 5000 );
- }
-
- // Remove the property listener.
- AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
-
- if ( microCounter > 5000000 ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: timeout waiting for sample rate update for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Now set the stream format for all streams. Also, check the
- // physical format of the device and change that if necessary.
- AudioStreamBasicDescription description;
- dataSize = sizeof( AudioStreamBasicDescription );
- property.mSelector = kAudioStreamPropertyVirtualFormat;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the sample rate and data format id. However, only make the
- // change if the sample rate is not within 1.0 of the desired
- // rate and the format is not linear pcm.
- bool updateFormat = false;
- if ( fabs( description.mSampleRate - (Float64)sampleRate ) > 1.0 ) {
- description.mSampleRate = (Float64) sampleRate;
- updateFormat = true;
- }
-
- if ( description.mFormatID != kAudioFormatLinearPCM ) {
- description.mFormatID = kAudioFormatLinearPCM;
- updateFormat = true;
- }
-
- if ( updateFormat ) {
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &description );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate or data format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Now check the physical format.
- property.mSelector = kAudioStreamPropertyPhysicalFormat;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream physical format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- //std::cout << "Current physical stream format:" << std::endl;
- //std::cout << " mBitsPerChan = " << description.mBitsPerChannel << std::endl;
- //std::cout << " aligned high = " << (description.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (description.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
- //std::cout << " bytesPerFrame = " << description.mBytesPerFrame << std::endl;
- //std::cout << " sample rate = " << description.mSampleRate << std::endl;
-
- if ( description.mFormatID != kAudioFormatLinearPCM || description.mBitsPerChannel < 16 ) {
- description.mFormatID = kAudioFormatLinearPCM;
- //description.mSampleRate = (Float64) sampleRate;
- AudioStreamBasicDescription testDescription = description;
- UInt32 formatFlags;
-
- // We'll try higher bit rates first and then work our way down.
- std::vector< std::pair<UInt32, UInt32> > physicalFormats;
- formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsFloat) & ~kLinearPCMFormatFlagIsSignedInteger;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
- formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
- physicalFormats.push_back( std::pair<Float32, UInt32>( 24, formatFlags ) ); // 24-bit packed
- formatFlags &= ~( kAudioFormatFlagIsPacked | kAudioFormatFlagIsAlignedHigh );
- physicalFormats.push_back( std::pair<Float32, UInt32>( 24.2, formatFlags ) ); // 24-bit in 4 bytes, aligned low
- formatFlags |= kAudioFormatFlagIsAlignedHigh;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 24.4, formatFlags ) ); // 24-bit in 4 bytes, aligned high
- formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 16, formatFlags ) );
- physicalFormats.push_back( std::pair<Float32, UInt32>( 8, formatFlags ) );
-
- bool setPhysicalFormat = false;
- for( unsigned int i=0; i<physicalFormats.size(); i++ ) {
- testDescription = description;
- testDescription.mBitsPerChannel = (UInt32) physicalFormats[i].first;
- testDescription.mFormatFlags = physicalFormats[i].second;
- if ( (24 == (UInt32)physicalFormats[i].first) && ~( physicalFormats[i].second & kAudioFormatFlagIsPacked ) )
- testDescription.mBytesPerFrame = 4 * testDescription.mChannelsPerFrame;
- else
- testDescription.mBytesPerFrame = testDescription.mBitsPerChannel/8 * testDescription.mChannelsPerFrame;
- testDescription.mBytesPerPacket = testDescription.mBytesPerFrame * testDescription.mFramesPerPacket;
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &testDescription );
- if ( result == noErr ) {
- setPhysicalFormat = true;
- //std::cout << "Updated physical stream format:" << std::endl;
- //std::cout << " mBitsPerChan = " << testDescription.mBitsPerChannel << std::endl;
- //std::cout << " aligned high = " << (testDescription.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (testDescription.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
- //std::cout << " bytesPerFrame = " << testDescription.mBytesPerFrame << std::endl;
- //std::cout << " sample rate = " << testDescription.mSampleRate << std::endl;
- break;
- }
- }
-
- if ( !setPhysicalFormat ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting physical data format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- } // done setting virtual/physical formats.
-
- // Get the stream / device latency.
- UInt32 latency;
- dataSize = sizeof( UInt32 );
- property.mSelector = kAudioDevicePropertyLatency;
- if ( AudioObjectHasProperty( id, &property ) == true ) {
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &latency );
- if ( result == kAudioHardwareNoError ) stream_.latency[ mode ] = latency;
- else {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting device latency for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- }
-
- // Byte-swapping: According to AudioHardware.h, the stream data will
- // always be presented in native-endian format, so we should never
- // need to byte swap.
- stream_.doByteSwap[mode] = false;
-
- // From the CoreAudio documentation, PCM data must be supplied as
- // 32-bit floats.
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
-
- if ( streamCount == 1 )
- stream_.nDeviceChannels[mode] = description.mChannelsPerFrame;
- else // multiple streams
- stream_.nDeviceChannels[mode] = channels;
- stream_.nUserChannels[mode] = channels;
- stream_.channelOffset[mode] = channelOffset; // offset within a CoreAudio stream
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
- if ( monoMode == true ) stream_.deviceInterleaved[mode] = false;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( streamCount == 1 ) {
- if ( stream_.nUserChannels[mode] > 1 &&
- stream_.userInterleaved != stream_.deviceInterleaved[mode] )
- stream_.doConvertBuffer[mode] = true;
- }
- else if ( monoMode && stream_.userInterleaved )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate our CoreHandle structure for the stream.
- CoreHandle *handle = 0;
- if ( stream_.apiHandle == 0 ) {
- try {
- handle = new CoreHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error allocating CoreHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init( &handle->condition, NULL ) ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
- stream_.apiHandle = (void *) handle;
- }
- else
- handle = (CoreHandle *) stream_.apiHandle;
- handle->iStream[mode] = firstStream;
- handle->nStreams[mode] = streamCount;
- handle->id[mode] = id;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- // stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- stream_.userBuffer[mode] = (char *) malloc( bufferBytes * sizeof(char) );
- memset( stream_.userBuffer[mode], 0, bufferBytes * sizeof(char) );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- // If possible, we will make use of the CoreAudio stream buffers as
- // "device buffers". However, we can't do this if using multiple
- // streams.
- if ( stream_.doConvertBuffer[mode] && handle->nStreams[mode] > 1 ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.sampleRate = sampleRate;
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
- stream_.callbackInfo.object = (void *) this;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) {
- if ( streamCount > 1 ) setConvertInfo( mode, 0 );
- else setConvertInfo( mode, channelOffset );
- }
-
- if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device )
- // Only one callback procedure per device.
- stream_.mode = DUPLEX;
- else {
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- result = AudioDeviceCreateIOProcID( id, callbackHandler, (void *) &stream_.callbackInfo, &handle->procId[mode] );
-#else
- // deprecated in favor of AudioDeviceCreateIOProcID()
- result = AudioDeviceAddIOProc( id, callbackHandler, (void *) &stream_.callbackInfo );
-#endif
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error setting callback for device (" << device << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
- if ( stream_.mode == OUTPUT && mode == INPUT )
- stream_.mode = DUPLEX;
- else
- stream_.mode = mode;
- }
-
- // Setup the device property listener for over/underload.
- property.mSelector = kAudioDeviceProcessorOverload;
- property.mScope = kAudioObjectPropertyScopeGlobal;
- result = AudioObjectAddPropertyListener( id, &property, xrunListener, (void *) handle );
-
- return SUCCESS;
-
- error:
- if ( handle ) {
- pthread_cond_destroy( &handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- return FAILURE;
-}
-
-void RtApiCore :: closeStream( void )
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiCore::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if (handle) {
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
-
- property.mSelector = kAudioDeviceProcessorOverload;
- property.mScope = kAudioObjectPropertyScopeGlobal;
- if (AudioObjectRemovePropertyListener( handle->id[0], &property, xrunListener, (void *) handle ) != noErr) {
- errorText_ = "RtApiCore::closeStream(): error removing property listener!";
- error( RtAudioError::WARNING );
- }
- }
- if ( stream_.state == STREAM_RUNNING )
- AudioDeviceStop( handle->id[0], callbackHandler );
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- AudioDeviceDestroyIOProcID( handle->id[0], handle->procId[0] );
-#else
- // deprecated in favor of AudioDeviceDestroyIOProcID()
- AudioDeviceRemoveIOProc( handle->id[0], callbackHandler );
-#endif
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
- if (handle) {
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
-
- property.mSelector = kAudioDeviceProcessorOverload;
- property.mScope = kAudioObjectPropertyScopeGlobal;
- if (AudioObjectRemovePropertyListener( handle->id[1], &property, xrunListener, (void *) handle ) != noErr) {
- errorText_ = "RtApiCore::closeStream(): error removing property listener!";
- error( RtAudioError::WARNING );
- }
- }
- if ( stream_.state == STREAM_RUNNING )
- AudioDeviceStop( handle->id[1], callbackHandler );
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- AudioDeviceDestroyIOProcID( handle->id[1], handle->procId[1] );
-#else
- // deprecated in favor of AudioDeviceDestroyIOProcID()
- AudioDeviceRemoveIOProc( handle->id[1], callbackHandler );
-#endif
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- // Destroy pthread condition variable.
- pthread_cond_destroy( &handle->condition );
- delete handle;
- stream_.apiHandle = 0;
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiCore :: startStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiCore::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- OSStatus result = noErr;
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- result = AudioDeviceStart( handle->id[0], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::startStream: system error (" << getErrorCode( result ) << ") starting callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( stream_.mode == INPUT ||
- ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
-
- result = AudioDeviceStart( handle->id[1], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::startStream: system error starting input callback procedure on device (" << stream_.device[1] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- stream_.state = STREAM_RUNNING;
-
- unlock:
- if ( result == noErr ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiCore :: stopStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiCore::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- OSStatus result = noErr;
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
- }
-
- result = AudioDeviceStop( handle->id[0], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
-
- result = AudioDeviceStop( handle->id[1], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping input callback procedure on device (" << stream_.device[1] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- stream_.state = STREAM_STOPPED;
-
- unlock:
- if ( result == noErr ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiCore :: abortStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiCore::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- handle->drainCounter = 2;
-
- stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted. It is better to handle it this way because the
-// callbackEvent() function probably should return before the AudioDeviceStop()
-// function is called.
-static void *coreStopStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiCore *object = (RtApiCore *) info->object;
-
- object->stopStream();
- pthread_exit( NULL );
-}
-
-bool RtApiCore :: callbackEvent( AudioDeviceID deviceId,
- const AudioBufferList *inBufferList,
- const AudioBufferList *outBufferList )
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal is finished.
- if ( handle->drainCounter > 3 ) {
- ThreadHandle threadId;
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == true )
- pthread_create( &threadId, NULL, coreStopStream, info );
- else // external call to stopStream()
- pthread_cond_signal( &handle->condition );
- return SUCCESS;
- }
-
- AudioDeviceID outputDevice = handle->id[0];
-
- // Invoke user callback to get fresh output data UNLESS we are
- // draining stream or duplex mode AND the input/output devices are
- // different AND this function is called for the input device.
- if ( handle->drainCounter == 0 && ( stream_.mode != DUPLEX || deviceId == outputDevice ) ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
-
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- abortStream();
- return SUCCESS;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- if ( stream_.mode == OUTPUT || ( stream_.mode == DUPLEX && deviceId == outputDevice ) ) {
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
- if ( handle->nStreams[0] == 1 ) {
- memset( outBufferList->mBuffers[handle->iStream[0]].mData,
- 0,
- outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
- }
- else { // fill multiple streams with zeros
- for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
- memset( outBufferList->mBuffers[handle->iStream[0]+i].mData,
- 0,
- outBufferList->mBuffers[handle->iStream[0]+i].mDataByteSize );
- }
- }
- }
- else if ( handle->nStreams[0] == 1 ) {
- if ( stream_.doConvertBuffer[0] ) { // convert directly to CoreAudio stream buffer
- convertBuffer( (char *) outBufferList->mBuffers[handle->iStream[0]].mData,
- stream_.userBuffer[0], stream_.convertInfo[0] );
- }
- else { // copy from user buffer
- memcpy( outBufferList->mBuffers[handle->iStream[0]].mData,
- stream_.userBuffer[0],
- outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
- }
- }
- else { // fill multiple streams
- Float32 *inBuffer = (Float32 *) stream_.userBuffer[0];
- if ( stream_.doConvertBuffer[0] ) {
- convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- inBuffer = (Float32 *) stream_.deviceBuffer;
- }
-
- if ( stream_.deviceInterleaved[0] == false ) { // mono mode
- UInt32 bufferBytes = outBufferList->mBuffers[handle->iStream[0]].mDataByteSize;
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- memcpy( outBufferList->mBuffers[handle->iStream[0]+i].mData,
- (void *)&inBuffer[i*stream_.bufferSize], bufferBytes );
- }
- }
- else { // fill multiple multi-channel streams with interleaved data
- UInt32 streamChannels, channelsLeft, inJump, outJump, inOffset;
- Float32 *out, *in;
-
- bool inInterleaved = ( stream_.userInterleaved ) ? true : false;
- UInt32 inChannels = stream_.nUserChannels[0];
- if ( stream_.doConvertBuffer[0] ) {
- inInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
- inChannels = stream_.nDeviceChannels[0];
- }
-
- if ( inInterleaved ) inOffset = 1;
- else inOffset = stream_.bufferSize;
-
- channelsLeft = inChannels;
- for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
- in = inBuffer;
- out = (Float32 *) outBufferList->mBuffers[handle->iStream[0]+i].mData;
- streamChannels = outBufferList->mBuffers[handle->iStream[0]+i].mNumberChannels;
-
- outJump = 0;
- // Account for possible channel offset in first stream
- if ( i == 0 && stream_.channelOffset[0] > 0 ) {
- streamChannels -= stream_.channelOffset[0];
- outJump = stream_.channelOffset[0];
- out += outJump;
- }
-
- // Account for possible unfilled channels at end of the last stream
- if ( streamChannels > channelsLeft ) {
- outJump = streamChannels - channelsLeft;
- streamChannels = channelsLeft;
- }
-
- // Determine input buffer offsets and skips
- if ( inInterleaved ) {
- inJump = inChannels;
- in += inChannels - channelsLeft;
- }
- else {
- inJump = 1;
- in += (inChannels - channelsLeft) * inOffset;
- }
-
- for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
- for ( unsigned int j=0; j<streamChannels; j++ ) {
- *out++ = in[j*inOffset];
- }
- out += outJump;
- in += inJump;
- }
- channelsLeft -= streamChannels;
- }
- }
- }
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- AudioDeviceID inputDevice;
- inputDevice = handle->id[1];
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && deviceId == inputDevice ) ) {
-
- if ( handle->nStreams[1] == 1 ) {
- if ( stream_.doConvertBuffer[1] ) { // convert directly from CoreAudio stream buffer
- convertBuffer( stream_.userBuffer[1],
- (char *) inBufferList->mBuffers[handle->iStream[1]].mData,
- stream_.convertInfo[1] );
- }
- else { // copy to user buffer
- memcpy( stream_.userBuffer[1],
- inBufferList->mBuffers[handle->iStream[1]].mData,
- inBufferList->mBuffers[handle->iStream[1]].mDataByteSize );
- }
- }
- else { // read from multiple streams
- Float32 *outBuffer = (Float32 *) stream_.userBuffer[1];
- if ( stream_.doConvertBuffer[1] ) outBuffer = (Float32 *) stream_.deviceBuffer;
-
- if ( stream_.deviceInterleaved[1] == false ) { // mono mode
- UInt32 bufferBytes = inBufferList->mBuffers[handle->iStream[1]].mDataByteSize;
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- memcpy( (void *)&outBuffer[i*stream_.bufferSize],
- inBufferList->mBuffers[handle->iStream[1]+i].mData, bufferBytes );
- }
- }
- else { // read from multiple multi-channel streams
- UInt32 streamChannels, channelsLeft, inJump, outJump, outOffset;
- Float32 *out, *in;
-
- bool outInterleaved = ( stream_.userInterleaved ) ? true : false;
- UInt32 outChannels = stream_.nUserChannels[1];
- if ( stream_.doConvertBuffer[1] ) {
- outInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
- outChannels = stream_.nDeviceChannels[1];
- }
-
- if ( outInterleaved ) outOffset = 1;
- else outOffset = stream_.bufferSize;
-
- channelsLeft = outChannels;
- for ( unsigned int i=0; i<handle->nStreams[1]; i++ ) {
- out = outBuffer;
- in = (Float32 *) inBufferList->mBuffers[handle->iStream[1]+i].mData;
- streamChannels = inBufferList->mBuffers[handle->iStream[1]+i].mNumberChannels;
-
- inJump = 0;
- // Account for possible channel offset in first stream
- if ( i == 0 && stream_.channelOffset[1] > 0 ) {
- streamChannels -= stream_.channelOffset[1];
- inJump = stream_.channelOffset[1];
- in += inJump;
- }
-
- // Account for possible unread channels at end of the last stream
- if ( streamChannels > channelsLeft ) {
- inJump = streamChannels - channelsLeft;
- streamChannels = channelsLeft;
- }
-
- // Determine output buffer offsets and skips
- if ( outInterleaved ) {
- outJump = outChannels;
- out += outChannels - channelsLeft;
- }
- else {
- outJump = 1;
- out += (outChannels - channelsLeft) * outOffset;
- }
-
- for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
- for ( unsigned int j=0; j<streamChannels; j++ ) {
- out[j*outOffset] = *in++;
- }
- out += outJump;
- in += inJump;
- }
- channelsLeft -= streamChannels;
- }
- }
-
- if ( stream_.doConvertBuffer[1] ) { // convert from our internal "device" buffer
- convertBuffer( stream_.userBuffer[1],
- stream_.deviceBuffer,
- stream_.convertInfo[1] );
- }
- }
- }
-
- unlock:
- //MUTEX_UNLOCK( &stream_.mutex );
-
- RtApi::tickStreamTime();
- return SUCCESS;
-}
-
-const char* RtApiCore :: getErrorCode( OSStatus code )
-{
- switch( code ) {
-
- case kAudioHardwareNotRunningError:
- return "kAudioHardwareNotRunningError";
-
- case kAudioHardwareUnspecifiedError:
- return "kAudioHardwareUnspecifiedError";
-
- case kAudioHardwareUnknownPropertyError:
- return "kAudioHardwareUnknownPropertyError";
-
- case kAudioHardwareBadPropertySizeError:
- return "kAudioHardwareBadPropertySizeError";
-
- case kAudioHardwareIllegalOperationError:
- return "kAudioHardwareIllegalOperationError";
-
- case kAudioHardwareBadObjectError:
- return "kAudioHardwareBadObjectError";
-
- case kAudioHardwareBadDeviceError:
- return "kAudioHardwareBadDeviceError";
-
- case kAudioHardwareBadStreamError:
- return "kAudioHardwareBadStreamError";
-
- case kAudioHardwareUnsupportedOperationError:
- return "kAudioHardwareUnsupportedOperationError";
-
- case kAudioDeviceUnsupportedFormatError:
- return "kAudioDeviceUnsupportedFormatError";
-
- case kAudioDevicePermissionsError:
- return "kAudioDevicePermissionsError";
-
- default:
- return "CoreAudio unknown error";
- }
-}
-
- //******************** End of __MACOSX_CORE__ *********************//
-#endif
-
-#if defined(__UNIX_JACK__)
-
-// JACK is a low-latency audio server, originally written for the
-// GNU/Linux operating system and now also ported to OS-X. It can
-// connect a number of different applications to an audio device, as
-// well as allowing them to share audio between themselves.
-//
-// When using JACK with RtAudio, "devices" refer to JACK clients that
-// have ports connected to the server. The JACK server is typically
-// started in a terminal as follows:
-//
-// .jackd -d alsa -d hw:0
-//
-// or through an interface program such as qjackctl. Many of the
-// parameters normally set for a stream are fixed by the JACK server
-// and can be specified when the JACK server is started. In
-// particular,
-//
-// .jackd -d alsa -d hw:0 -r 44100 -p 512 -n 4
-//
-// specifies a sample rate of 44100 Hz, a buffer size of 512 sample
-// frames, and number of buffers = 4. Once the server is running, it
-// is not possible to override these values. If the values are not
-// specified in the command-line, the JACK server uses default values.
-//
-// The JACK server does not have to be running when an instance of
-// RtApiJack is created, though the function getDeviceCount() will
-// report 0 devices found until JACK has been started. When no
-// devices are available (i.e., the JACK server is not running), a
-// stream cannot be opened.
-
-#include <jack/jack.h>
-#include <unistd.h>
-#include <cstdio>
-
-// A structure to hold various information related to the Jack API
-// implementation.
-struct JackHandle {
- jack_client_t *client;
- jack_port_t **ports[2];
- std::string deviceName[2];
- bool xrun[2];
- pthread_cond_t condition;
- int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
-
- JackHandle()
- :client(0), drainCounter(0), internalDrain(false) { ports[0] = 0; ports[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-static void jackSilentError( const char * ) {};
-
-RtApiJack :: RtApiJack()
-{
- // Nothing to do here.
-#if !defined(__RTAUDIO_DEBUG__)
- // Turn off Jack's internal error reporting.
- jack_set_error_function( &jackSilentError );
-#endif
-}
-
-RtApiJack :: ~RtApiJack()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiJack :: getDeviceCount( void )
-{
- // See if we can become a jack client.
- jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
- jack_status_t *status = NULL;
- jack_client_t *client = jack_client_open( "RtApiJackCount", options, status );
- if ( client == 0 ) return 0;
-
- const char **ports;
- std::string port, previousPort;
- unsigned int nChannels = 0, nDevices = 0;
- ports = jack_get_ports( client, NULL, NULL, 0 );
- if ( ports ) {
- // Parse the port names up to the first colon (:).
- size_t iColon = 0;
- do {
- port = (char *) ports[ nChannels ];
- iColon = port.find(":");
- if ( iColon != std::string::npos ) {
- port = port.substr( 0, iColon + 1 );
- if ( port != previousPort ) {
- nDevices++;
- previousPort = port;
- }
- }
- } while ( ports[++nChannels] );
- free( ports );
- }
-
- jack_client_close( client );
- return nDevices;
-}
-
-RtAudio::DeviceInfo RtApiJack :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption
- jack_status_t *status = NULL;
- jack_client_t *client = jack_client_open( "RtApiJackInfo", options, status );
- if ( client == 0 ) {
- errorText_ = "RtApiJack::getDeviceInfo: Jack server not found or connection error!";
- error( RtAudioError::WARNING );
- return info;
- }
-
- const char **ports;
- std::string port, previousPort;
- unsigned int nPorts = 0, nDevices = 0;
- ports = jack_get_ports( client, NULL, NULL, 0 );
- if ( ports ) {
- // Parse the port names up to the first colon (:).
- size_t iColon = 0;
- do {
- port = (char *) ports[ nPorts ];
- iColon = port.find(":");
- if ( iColon != std::string::npos ) {
- port = port.substr( 0, iColon );
- if ( port != previousPort ) {
- if ( nDevices == device ) info.name = port;
- nDevices++;
- previousPort = port;
- }
- }
- } while ( ports[++nPorts] );
- free( ports );
- }
-
- if ( device >= nDevices ) {
- jack_client_close( client );
- errorText_ = "RtApiJack::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- // Get the current jack server sample rate.
- info.sampleRates.clear();
-
- info.preferredSampleRate = jack_get_sample_rate( client );
- info.sampleRates.push_back( info.preferredSampleRate );
-
- // Count the available ports containing the client name as device
- // channels. Jack "input ports" equal RtAudio output channels.
- unsigned int nChannels = 0;
- ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsInput );
- if ( ports ) {
- while ( ports[ nChannels ] ) nChannels++;
- free( ports );
- info.outputChannels = nChannels;
- }
-
- // Jack "output ports" equal RtAudio input channels.
- nChannels = 0;
- ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsOutput );
- if ( ports ) {
- while ( ports[ nChannels ] ) nChannels++;
- free( ports );
- info.inputChannels = nChannels;
- }
-
- if ( info.outputChannels == 0 && info.inputChannels == 0 ) {
- jack_client_close(client);
- errorText_ = "RtApiJack::getDeviceInfo: error determining Jack input/output channels!";
- error( RtAudioError::WARNING );
- return info;
- }
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // Jack always uses 32-bit floats.
- info.nativeFormats = RTAUDIO_FLOAT32;
-
- // Jack doesn't provide default devices so we'll use the first available one.
- if ( device == 0 && info.outputChannels > 0 )
- info.isDefaultOutput = true;
- if ( device == 0 && info.inputChannels > 0 )
- info.isDefaultInput = true;
-
- jack_client_close(client);
- info.probed = true;
- return info;
-}
-
-static int jackCallbackHandler( jack_nframes_t nframes, void *infoPointer )
-{
- CallbackInfo *info = (CallbackInfo *) infoPointer;
-
- RtApiJack *object = (RtApiJack *) info->object;
- if ( object->callbackEvent( (unsigned long) nframes ) == false ) return 1;
-
- return 0;
-}
-
-// This function will be called by a spawned thread when the Jack
-// server signals that it is shutting down. It is necessary to handle
-// it this way because the jackShutdown() function must return before
-// the jack_deactivate() function (in closeStream()) will return.
-static void *jackCloseStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiJack *object = (RtApiJack *) info->object;
-
- object->closeStream();
-
- pthread_exit( NULL );
-}
-static void jackShutdown( void *infoPointer )
-{
- CallbackInfo *info = (CallbackInfo *) infoPointer;
- RtApiJack *object = (RtApiJack *) info->object;
-
- // Check current stream state. If stopped, then we'll assume this
- // was called as a result of a call to RtApiJack::stopStream (the
- // deactivation of a client handle causes this function to be called).
- // If not, we'll assume the Jack server is shutting down or some
- // other problem occurred and we should close the stream.
- if ( object->isStreamRunning() == false ) return;
-
- ThreadHandle threadId;
- pthread_create( &threadId, NULL, jackCloseStream, info );
- std::cerr << "\nRtApiJack: the Jack server is shutting down this client ... stream stopped and closed!!\n" << std::endl;
-}
-
-static int jackXrun( void *infoPointer )
-{
- JackHandle *handle = (JackHandle *) infoPointer;
-
- if ( handle->ports[0] ) handle->xrun[0] = true;
- if ( handle->ports[1] ) handle->xrun[1] = true;
-
- return 0;
-}
-
-bool RtApiJack :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
-
- // Look for jack server and try to become a client (only do once per stream).
- jack_client_t *client = 0;
- if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) {
- jack_options_t jackoptions = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
- jack_status_t *status = NULL;
- if ( options && !options->streamName.empty() )
- client = jack_client_open( options->streamName.c_str(), jackoptions, status );
- else
- client = jack_client_open( "RtApiJack", jackoptions, status );
- if ( client == 0 ) {
- errorText_ = "RtApiJack::probeDeviceOpen: Jack server not found or connection error!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
- }
- else {
- // The handle must have been created on an earlier pass.
- client = handle->client;
- }
-
- const char **ports;
- std::string port, previousPort, deviceName;
- unsigned int nPorts = 0, nDevices = 0;
- ports = jack_get_ports( client, NULL, NULL, 0 );
- if ( ports ) {
- // Parse the port names up to the first colon (:).
- size_t iColon = 0;
- do {
- port = (char *) ports[ nPorts ];
- iColon = port.find(":");
- if ( iColon != std::string::npos ) {
- port = port.substr( 0, iColon );
- if ( port != previousPort ) {
- if ( nDevices == device ) deviceName = port;
- nDevices++;
- previousPort = port;
- }
- }
- } while ( ports[++nPorts] );
- free( ports );
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiJack::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- // Count the available ports containing the client name as device
- // channels. Jack "input ports" equal RtAudio output channels.
- unsigned int nChannels = 0;
- unsigned long flag = JackPortIsInput;
- if ( mode == INPUT ) flag = JackPortIsOutput;
- ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
- if ( ports ) {
- while ( ports[ nChannels ] ) nChannels++;
- free( ports );
- }
-
- // Compare the jack ports for specified client to the requested number of channels.
- if ( nChannels < (channels + firstChannel) ) {
- errorStream_ << "RtApiJack::probeDeviceOpen: requested number of channels (" << channels << ") + offset (" << firstChannel << ") not found for specified device (" << device << ":" << deviceName << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check the jack server sample rate.
- unsigned int jackRate = jack_get_sample_rate( client );
- if ( sampleRate != jackRate ) {
- jack_client_close( client );
- errorStream_ << "RtApiJack::probeDeviceOpen: the requested sample rate (" << sampleRate << ") is different than the JACK server rate (" << jackRate << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.sampleRate = jackRate;
-
- // Get the latency of the JACK port.
- ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
- if ( ports[ firstChannel ] ) {
- // Added by Ge Wang
- jack_latency_callback_mode_t cbmode = (mode == INPUT ? JackCaptureLatency : JackPlaybackLatency);
- // the range (usually the min and max are equal)
- jack_latency_range_t latrange; latrange.min = latrange.max = 0;
- // get the latency range
- jack_port_get_latency_range( jack_port_by_name( client, ports[firstChannel] ), cbmode, &latrange );
- // be optimistic, use the min!
- stream_.latency[mode] = latrange.min;
- //stream_.latency[mode] = jack_port_get_latency( jack_port_by_name( client, ports[ firstChannel ] ) );
- }
- free( ports );
-
- // The jack server always uses 32-bit floating-point data.
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- stream_.userFormat = format;
-
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
-
- // Jack always uses non-interleaved buffers.
- stream_.deviceInterleaved[mode] = false;
-
- // Jack always provides host byte-ordered data.
- stream_.doByteSwap[mode] = false;
-
- // Get the buffer size. The buffer size and number of buffers
- // (periods) is set when the jack server is started.
- stream_.bufferSize = (int) jack_get_buffer_size( client );
- *bufferSize = stream_.bufferSize;
-
- stream_.nDeviceChannels[mode] = channels;
- stream_.nUserChannels[mode] = channels;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate our JackHandle structure for the stream.
- if ( handle == 0 ) {
- try {
- handle = new JackHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating JackHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init(&handle->condition, NULL) ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
- stream_.apiHandle = (void *) handle;
- handle->client = client;
- }
- handle->deviceName[mode] = deviceName;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- if ( mode == OUTPUT )
- bufferBytes = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- else { // mode == INPUT
- bufferBytes = stream_.nDeviceChannels[1] * formatBytes( stream_.deviceFormat[1] );
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes(stream_.deviceFormat[0]);
- if ( bufferBytes < bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- // Allocate memory for the Jack ports (channels) identifiers.
- handle->ports[mode] = (jack_port_t **) malloc ( sizeof (jack_port_t *) * channels );
- if ( handle->ports[mode] == NULL ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating port memory.";
- goto error;
- }
-
- stream_.device[mode] = device;
- stream_.channelOffset[mode] = firstChannel;
- stream_.state = STREAM_STOPPED;
- stream_.callbackInfo.object = (void *) this;
-
- if ( stream_.mode == OUTPUT && mode == INPUT )
- // We had already set up the stream for output.
- stream_.mode = DUPLEX;
- else {
- stream_.mode = mode;
- jack_set_process_callback( handle->client, jackCallbackHandler, (void *) &stream_.callbackInfo );
- jack_set_xrun_callback( handle->client, jackXrun, (void *) &handle );
- jack_on_shutdown( handle->client, jackShutdown, (void *) &stream_.callbackInfo );
- }
-
- // Register our ports.
- char label[64];
- if ( mode == OUTPUT ) {
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- snprintf( label, 64, "outport %d", i );
- handle->ports[0][i] = jack_port_register( handle->client, (const char *)label,
- JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0 );
- }
- }
- else {
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- snprintf( label, 64, "inport %d", i );
- handle->ports[1][i] = jack_port_register( handle->client, (const char *)label,
- JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0 );
- }
- }
-
- // Setup the buffer conversion information structure. We don't use
- // buffers to do channel offsets, so we override that parameter
- // here.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
-
- return SUCCESS;
-
- error:
- if ( handle ) {
- pthread_cond_destroy( &handle->condition );
- jack_client_close( handle->client );
-
- if ( handle->ports[0] ) free( handle->ports[0] );
- if ( handle->ports[1] ) free( handle->ports[1] );
-
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- return FAILURE;
-}
-
-void RtApiJack :: closeStream( void )
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiJack::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- if ( handle ) {
-
- if ( stream_.state == STREAM_RUNNING )
- jack_deactivate( handle->client );
-
- jack_client_close( handle->client );
- }
-
- if ( handle ) {
- if ( handle->ports[0] ) free( handle->ports[0] );
- if ( handle->ports[1] ) free( handle->ports[1] );
- pthread_cond_destroy( &handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiJack :: startStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiJack::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- int result = jack_activate( handle->client );
- if ( result ) {
- errorText_ = "RtApiJack::startStream(): unable to activate JACK client!";
- goto unlock;
- }
-
- const char **ports;
-
- // Get the list of available ports.
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- result = 1;
- ports = jack_get_ports( handle->client, handle->deviceName[0].c_str(), NULL, JackPortIsInput);
- if ( ports == NULL) {
- errorText_ = "RtApiJack::startStream(): error determining available JACK input ports!";
- goto unlock;
- }
-
- // Now make the port connections. Since RtAudio wasn't designed to
- // allow the user to select particular channels of a device, we'll
- // just open the first "nChannels" ports with offset.
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- result = 1;
- if ( ports[ stream_.channelOffset[0] + i ] )
- result = jack_connect( handle->client, jack_port_name( handle->ports[0][i] ), ports[ stream_.channelOffset[0] + i ] );
- if ( result ) {
- free( ports );
- errorText_ = "RtApiJack::startStream(): error connecting output ports!";
- goto unlock;
- }
- }
- free(ports);
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
- result = 1;
- ports = jack_get_ports( handle->client, handle->deviceName[1].c_str(), NULL, JackPortIsOutput );
- if ( ports == NULL) {
- errorText_ = "RtApiJack::startStream(): error determining available JACK output ports!";
- goto unlock;
- }
-
- // Now make the port connections. See note above.
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- result = 1;
- if ( ports[ stream_.channelOffset[1] + i ] )
- result = jack_connect( handle->client, ports[ stream_.channelOffset[1] + i ], jack_port_name( handle->ports[1][i] ) );
- if ( result ) {
- free( ports );
- errorText_ = "RtApiJack::startStream(): error connecting input ports!";
- goto unlock;
- }
- }
- free(ports);
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- stream_.state = STREAM_RUNNING;
-
- unlock:
- if ( result == 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiJack :: stopStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiJack::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
- }
- }
-
- jack_deactivate( handle->client );
- stream_.state = STREAM_STOPPED;
-}
-
-void RtApiJack :: abortStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiJack::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- handle->drainCounter = 2;
-
- stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted. It is necessary to handle it this way because the
-// callbackEvent() function must return before the jack_deactivate()
-// function will return.
-static void *jackStopStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiJack *object = (RtApiJack *) info->object;
-
- object->stopStream();
- pthread_exit( NULL );
-}
-
-bool RtApiJack :: callbackEvent( unsigned long nframes )
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
- if ( stream_.bufferSize != nframes ) {
- errorText_ = "RtApiCore::callbackEvent(): the JACK buffer size has changed ... cannot process!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal is finished.
- if ( handle->drainCounter > 3 ) {
- ThreadHandle threadId;
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == true )
- pthread_create( &threadId, NULL, jackStopStream, info );
- else
- pthread_cond_signal( &handle->condition );
- return SUCCESS;
- }
-
- // Invoke user callback first, to get fresh output data.
- if ( handle->drainCounter == 0 ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- ThreadHandle id;
- pthread_create( &id, NULL, jackStopStream, info );
- return SUCCESS;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- jack_default_audio_sample_t *jackbuffer;
- unsigned long bufferBytes = nframes * sizeof( jack_default_audio_sample_t );
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
- for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
- memset( jackbuffer, 0, bufferBytes );
- }
-
- }
- else if ( stream_.doConvertBuffer[0] ) {
-
- convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-
- for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
- memcpy( jackbuffer, &stream_.deviceBuffer[i*bufferBytes], bufferBytes );
- }
- }
- else { // no buffer conversion
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
- memcpy( jackbuffer, &stream_.userBuffer[0][i*bufferBytes], bufferBytes );
- }
- }
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- if ( stream_.doConvertBuffer[1] ) {
- for ( unsigned int i=0; i<stream_.nDeviceChannels[1]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
- memcpy( &stream_.deviceBuffer[i*bufferBytes], jackbuffer, bufferBytes );
- }
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
- }
- else { // no buffer conversion
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
- memcpy( &stream_.userBuffer[1][i*bufferBytes], jackbuffer, bufferBytes );
- }
- }
- }
-
- unlock:
- RtApi::tickStreamTime();
- return SUCCESS;
-}
- //******************** End of __UNIX_JACK__ *********************//
-#endif
-
-#if defined(__WINDOWS_ASIO__) // ASIO API on Windows
-
-// The ASIO API is designed around a callback scheme, so this
-// implementation is similar to that used for OS-X CoreAudio and Linux
-// Jack. The primary constraint with ASIO is that it only allows
-// access to a single driver at a time. Thus, it is not possible to
-// have more than one simultaneous RtAudio stream.
-//
-// This implementation also requires a number of external ASIO files
-// and a few global variables. The ASIO callback scheme does not
-// allow for the passing of user data, so we must create a global
-// pointer to our callbackInfo structure.
-//
-// On unix systems, we make use of a pthread condition variable.
-// Since there is no equivalent in Windows, I hacked something based
-// on information found in
-// http://www.cs.wustl.edu/~schmidt/win32-cv-1.html.
-
-#include "asiosys.h"
-#include "asio.h"
-#include "iasiothiscallresolver.h"
-#include "asiodrivers.h"
-#include <cmath>
-
-static AsioDrivers drivers;
-static ASIOCallbacks asioCallbacks;
-static ASIODriverInfo driverInfo;
-static CallbackInfo *asioCallbackInfo;
-static bool asioXRun;
-
-struct AsioHandle {
- int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
- ASIOBufferInfo *bufferInfos;
- HANDLE condition;
-
- AsioHandle()
- :drainCounter(0), internalDrain(false), bufferInfos(0) {}
-};
-
-// Function declarations (definitions at end of section)
-static const char* getAsioErrorString( ASIOError result );
-static void sampleRateChanged( ASIOSampleRate sRate );
-static long asioMessages( long selector, long value, void* message, double* opt );
-
-RtApiAsio :: RtApiAsio()
-{
- // ASIO cannot run on a multi-threaded appartment. You can call
- // CoInitialize beforehand, but it must be for appartment threading
- // (in which case, CoInitilialize will return S_FALSE here).
- coInitialized_ = false;
- HRESULT hr = CoInitialize( NULL );
- if ( FAILED(hr) ) {
- errorText_ = "RtApiAsio::ASIO requires a single-threaded appartment. Call CoInitializeEx(0,COINIT_APARTMENTTHREADED)";
- error( RtAudioError::WARNING );
- }
- coInitialized_ = true;
-
- drivers.removeCurrentDriver();
- driverInfo.asioVersion = 2;
-
- // See note in DirectSound implementation about GetDesktopWindow().
- driverInfo.sysRef = GetForegroundWindow();
-}
-
-RtApiAsio :: ~RtApiAsio()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
- if ( coInitialized_ ) CoUninitialize();
-}
-
-unsigned int RtApiAsio :: getDeviceCount( void )
-{
- return (unsigned int) drivers.asioGetNumDev();
-}
-
-RtAudio::DeviceInfo RtApiAsio :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- // Get device ID
- unsigned int nDevices = getDeviceCount();
- if ( nDevices == 0 ) {
- errorText_ = "RtApiAsio::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiAsio::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- // If a stream is already open, we cannot probe other devices. Thus, use the saved results.
- if ( stream_.state != STREAM_CLOSED ) {
- if ( device >= devices_.size() ) {
- errorText_ = "RtApiAsio::getDeviceInfo: device ID was not present before stream was opened.";
- error( RtAudioError::WARNING );
- return info;
- }
- return devices_[ device ];
- }
-
- char driverName[32];
- ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::getDeviceInfo: unable to get driver name (" << getAsioErrorString( result ) << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- info.name = driverName;
-
- if ( !drivers.loadDriver( driverName ) ) {
- errorStream_ << "RtApiAsio::getDeviceInfo: unable to load driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- result = ASIOInit( &driverInfo );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Determine the device channel information.
- long inputChannels, outputChannels;
- result = ASIOGetChannels( &inputChannels, &outputChannels );
- if ( result != ASE_OK ) {
- drivers.removeCurrentDriver();
- errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- info.outputChannels = outputChannels;
- info.inputChannels = inputChannels;
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // Determine the supported sample rates.
- info.sampleRates.clear();
- for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
- result = ASIOCanSampleRate( (ASIOSampleRate) SAMPLE_RATES[i] );
- if ( result == ASE_OK ) {
- info.sampleRates.push_back( SAMPLE_RATES[i] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[i];
- }
- }
-
- // Determine supported data types ... just check first channel and assume rest are the same.
- ASIOChannelInfo channelInfo;
- channelInfo.channel = 0;
- channelInfo.isInput = true;
- if ( info.inputChannels <= 0 ) channelInfo.isInput = false;
- result = ASIOGetChannelInfo( &channelInfo );
- if ( result != ASE_OK ) {
- drivers.removeCurrentDriver();
- errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting driver channel info (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- info.nativeFormats = 0;
- if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB )
- info.nativeFormats |= RTAUDIO_SINT16;
- else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB )
- info.nativeFormats |= RTAUDIO_SINT32;
- else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB )
- info.nativeFormats |= RTAUDIO_FLOAT32;
- else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB )
- info.nativeFormats |= RTAUDIO_FLOAT64;
- else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB )
- info.nativeFormats |= RTAUDIO_SINT24;
-
- if ( info.outputChannels > 0 )
- if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
- if ( info.inputChannels > 0 )
- if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
-
- info.probed = true;
- drivers.removeCurrentDriver();
- return info;
-}
-
-static void bufferSwitch( long index, ASIOBool /*processNow*/ )
-{
- RtApiAsio *object = (RtApiAsio *) asioCallbackInfo->object;
- object->callbackEvent( index );
-}
-
-void RtApiAsio :: saveDeviceInfo( void )
-{
- devices_.clear();
-
- unsigned int nDevices = getDeviceCount();
- devices_.resize( nDevices );
- for ( unsigned int i=0; i<nDevices; i++ )
- devices_[i] = getDeviceInfo( i );
-}
-
-bool RtApiAsio :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
- bool isDuplexInput = mode == INPUT && stream_.mode == OUTPUT;
-
- // For ASIO, a duplex stream MUST use the same driver.
- if ( isDuplexInput && stream_.device[0] != device ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: an ASIO duplex stream must use the same device for input and output!";
- return FAILURE;
- }
-
- char driverName[32];
- ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: unable to get driver name (" << getAsioErrorString( result ) << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Only load the driver once for duplex stream.
- if ( !isDuplexInput ) {
- // The getDeviceInfo() function will not work when a stream is open
- // because ASIO does not allow multiple devices to run at the same
- // time. Thus, we'll probe the system before opening a stream and
- // save the results for use by getDeviceInfo().
- this->saveDeviceInfo();
-
- if ( !drivers.loadDriver( driverName ) ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: unable to load driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- result = ASIOInit( &driverInfo );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // keep them before any "goto error", they are used for error cleanup + goto device boundary checks
- bool buffersAllocated = false;
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- unsigned int nChannels;
-
-
- // Check the device channel count.
- long inputChannels, outputChannels;
- result = ASIOGetChannels( &inputChannels, &outputChannels );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- if ( ( mode == OUTPUT && (channels+firstChannel) > (unsigned int) outputChannels) ||
- ( mode == INPUT && (channels+firstChannel) > (unsigned int) inputChannels) ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested channel count (" << channels << ") + offset (" << firstChannel << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
- stream_.nDeviceChannels[mode] = channels;
- stream_.nUserChannels[mode] = channels;
- stream_.channelOffset[mode] = firstChannel;
-
- // Verify the sample rate is supported.
- result = ASIOCanSampleRate( (ASIOSampleRate) sampleRate );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested sample rate (" << sampleRate << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Get the current sample rate
- ASIOSampleRate currentRate;
- result = ASIOGetSampleRate( &currentRate );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error getting sample rate.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Set the sample rate only if necessary
- if ( currentRate != sampleRate ) {
- result = ASIOSetSampleRate( (ASIOSampleRate) sampleRate );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error setting sample rate (" << sampleRate << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
- }
-
- // Determine the driver data type.
- ASIOChannelInfo channelInfo;
- channelInfo.channel = 0;
- if ( mode == OUTPUT ) channelInfo.isInput = false;
- else channelInfo.isInput = true;
- result = ASIOGetChannelInfo( &channelInfo );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting data format.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Assuming WINDOWS host is always little-endian.
- stream_.doByteSwap[mode] = false;
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = 0;
- if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- if ( channelInfo.type == ASIOSTInt16MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- if ( channelInfo.type == ASIOSTInt32MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- if ( channelInfo.type == ASIOSTFloat32MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
- if ( channelInfo.type == ASIOSTFloat64MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- if ( channelInfo.type == ASIOSTInt24MSB ) stream_.doByteSwap[mode] = true;
- }
-
- if ( stream_.deviceFormat[mode] == 0 ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Set the buffer size. For a duplex stream, this will end up
- // setting the buffer size based on the input constraints, which
- // should be ok.
- long minSize, maxSize, preferSize, granularity;
- result = ASIOGetBufferSize( &minSize, &maxSize, &preferSize, &granularity );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting buffer size.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- if ( isDuplexInput ) {
- // When this is the duplex input (output was opened before), then we have to use the same
- // buffersize as the output, because it might use the preferred buffer size, which most
- // likely wasn't passed as input to this. The buffer sizes have to be identically anyway,
- // So instead of throwing an error, make them equal. The caller uses the reference
- // to the "bufferSize" param as usual to set up processing buffers.
-
- *bufferSize = stream_.bufferSize;
-
- } else {
- if ( *bufferSize == 0 ) *bufferSize = preferSize;
- else if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
- else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
- else if ( granularity == -1 ) {
- // Make sure bufferSize is a power of two.
- int log2_of_min_size = 0;
- int log2_of_max_size = 0;
-
- for ( unsigned int i = 0; i < sizeof(long) * 8; i++ ) {
- if ( minSize & ((long)1 << i) ) log2_of_min_size = i;
- if ( maxSize & ((long)1 << i) ) log2_of_max_size = i;
- }
-
- long min_delta = std::abs( (long)*bufferSize - ((long)1 << log2_of_min_size) );
- int min_delta_num = log2_of_min_size;
-
- for (int i = log2_of_min_size + 1; i <= log2_of_max_size; i++) {
- long current_delta = std::abs( (long)*bufferSize - ((long)1 << i) );
- if (current_delta < min_delta) {
- min_delta = current_delta;
- min_delta_num = i;
- }
- }
-
- *bufferSize = ( (unsigned int)1 << min_delta_num );
- if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
- else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
- }
- else if ( granularity != 0 ) {
- // Set to an even multiple of granularity, rounding up.
- *bufferSize = (*bufferSize + granularity-1) / granularity * granularity;
- }
- }
-
- /*
- // we don't use it anymore, see above!
- // Just left it here for the case...
- if ( isDuplexInput && stream_.bufferSize != *bufferSize ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: input/output buffersize discrepancy!";
- goto error;
- }
- */
-
- stream_.bufferSize = *bufferSize;
- stream_.nBuffers = 2;
-
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
-
- // ASIO always uses non-interleaved buffers.
- stream_.deviceInterleaved[mode] = false;
-
- // Allocate, if necessary, our AsioHandle structure for the stream.
- if ( handle == 0 ) {
- try {
- handle = new AsioHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: error allocating AsioHandle memory.";
- goto error;
- }
- handle->bufferInfos = 0;
-
- // Create a manual-reset event.
- handle->condition = CreateEvent( NULL, // no security
- TRUE, // manual-reset
- FALSE, // non-signaled initially
- NULL ); // unnamed
- stream_.apiHandle = (void *) handle;
- }
-
- // Create the ASIO internal buffers. Since RtAudio sets up input
- // and output separately, we'll have to dispose of previously
- // created output buffers for a duplex stream.
- if ( mode == INPUT && stream_.mode == OUTPUT ) {
- ASIODisposeBuffers();
- if ( handle->bufferInfos ) free( handle->bufferInfos );
- }
-
- // Allocate, initialize, and save the bufferInfos in our stream callbackInfo structure.
- unsigned int i;
- nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
- handle->bufferInfos = (ASIOBufferInfo *) malloc( nChannels * sizeof(ASIOBufferInfo) );
- if ( handle->bufferInfos == NULL ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: error allocating bufferInfo memory for driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- ASIOBufferInfo *infos;
- infos = handle->bufferInfos;
- for ( i=0; i<stream_.nDeviceChannels[0]; i++, infos++ ) {
- infos->isInput = ASIOFalse;
- infos->channelNum = i + stream_.channelOffset[0];
- infos->buffers[0] = infos->buffers[1] = 0;
- }
- for ( i=0; i<stream_.nDeviceChannels[1]; i++, infos++ ) {
- infos->isInput = ASIOTrue;
- infos->channelNum = i + stream_.channelOffset[1];
- infos->buffers[0] = infos->buffers[1] = 0;
- }
-
- // prepare for callbacks
- stream_.sampleRate = sampleRate;
- stream_.device[mode] = device;
- stream_.mode = isDuplexInput ? DUPLEX : mode;
-
- // store this class instance before registering callbacks, that are going to use it
- asioCallbackInfo = &stream_.callbackInfo;
- stream_.callbackInfo.object = (void *) this;
-
- // Set up the ASIO callback structure and create the ASIO data buffers.
- asioCallbacks.bufferSwitch = &bufferSwitch;
- asioCallbacks.sampleRateDidChange = &sampleRateChanged;
- asioCallbacks.asioMessage = &asioMessages;
- asioCallbacks.bufferSwitchTimeInfo = NULL;
- result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
- if ( result != ASE_OK ) {
- // Standard method failed. This can happen with strict/misbehaving drivers that return valid buffer size ranges
- // but only accept the preferred buffer size as parameter for ASIOCreateBuffers. eg. Creatives ASIO driver
- // in that case, let's be naïve and try that instead
- *bufferSize = preferSize;
- stream_.bufferSize = *bufferSize;
- result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
- }
-
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") creating buffers.";
- errorText_ = errorStream_.str();
- goto error;
- }
- buffersAllocated = true;
- stream_.state = STREAM_STOPPED;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate necessary internal buffers
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( isDuplexInput && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- // Determine device latencies
- long inputLatency, outputLatency;
- result = ASIOGetLatencies( &inputLatency, &outputLatency );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting latency.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING); // warn but don't fail
- }
- else {
- stream_.latency[0] = outputLatency;
- stream_.latency[1] = inputLatency;
- }
-
- // Setup the buffer conversion information structure. We don't use
- // buffers to do channel offsets, so we override that parameter
- // here.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
-
- return SUCCESS;
-
- error:
- if ( !isDuplexInput ) {
- // the cleanup for error in the duplex input, is done by RtApi::openStream
- // So we clean up for single channel only
-
- if ( buffersAllocated )
- ASIODisposeBuffers();
-
- drivers.removeCurrentDriver();
-
- if ( handle ) {
- CloseHandle( handle->condition );
- if ( handle->bufferInfos )
- free( handle->bufferInfos );
-
- delete handle;
- stream_.apiHandle = 0;
- }
-
-
- if ( stream_.userBuffer[mode] ) {
- free( stream_.userBuffer[mode] );
- stream_.userBuffer[mode] = 0;
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
- }
-
- return FAILURE;
-}////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void RtApiAsio :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAsio::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- if ( stream_.state == STREAM_RUNNING ) {
- stream_.state = STREAM_STOPPED;
- ASIOStop();
- }
- ASIODisposeBuffers();
- drivers.removeCurrentDriver();
-
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- if ( handle ) {
- CloseHandle( handle->condition );
- if ( handle->bufferInfos )
- free( handle->bufferInfos );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-bool stopThreadCalled = false;
-
-void RtApiAsio :: startStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiAsio::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- ASIOError result = ASIOStart();
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::startStream: error (" << getAsioErrorString( result ) << ") starting device.";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- ResetEvent( handle->condition );
- stream_.state = STREAM_RUNNING;
- asioXRun = false;
-
- unlock:
- stopThreadCalled = false;
-
- if ( result == ASE_OK ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAsio :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAsio::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- WaitForSingleObject( handle->condition, INFINITE ); // block until signaled
- }
- }
-
- stream_.state = STREAM_STOPPED;
-
- ASIOError result = ASIOStop();
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::stopStream: error (" << getAsioErrorString( result ) << ") stopping device.";
- errorText_ = errorStream_.str();
- }
-
- if ( result == ASE_OK ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAsio :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAsio::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- // The following lines were commented-out because some behavior was
- // noted where the device buffers need to be zeroed to avoid
- // continuing sound, even when the device buffers are completely
- // disposed. So now, calling abort is the same as calling stop.
- // AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- // handle->drainCounter = 2;
- stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted. It is necessary to handle it this way because the
-// callbackEvent() function must return before the ASIOStop()
-// function will return.
-static unsigned __stdcall asioStopStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiAsio *object = (RtApiAsio *) info->object;
-
- object->stopStream();
- _endthreadex( 0 );
- return 0;
-}
-
-bool RtApiAsio :: callbackEvent( long bufferIndex )
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAsio::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal if finished.
- if ( handle->drainCounter > 3 ) {
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == false )
- SetEvent( handle->condition );
- else { // spawn a thread to stop the stream
- unsigned threadId;
- stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
- &stream_.callbackInfo, 0, &threadId );
- }
- return SUCCESS;
- }
-
- // Invoke user callback to get fresh output data UNLESS we are
- // draining stream.
- if ( handle->drainCounter == 0 ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && asioXRun == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- asioXRun = false;
- }
- if ( stream_.mode != OUTPUT && asioXRun == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- asioXRun = false;
- }
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- unsigned threadId;
- stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
- &stream_.callbackInfo, 0, &threadId );
- return SUCCESS;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- unsigned int nChannels, bufferBytes, i, j;
- nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- bufferBytes = stream_.bufferSize * formatBytes( stream_.deviceFormat[0] );
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput != ASIOTrue )
- memset( handle->bufferInfos[i].buffers[bufferIndex], 0, bufferBytes );
- }
-
- }
- else if ( stream_.doConvertBuffer[0] ) {
-
- convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer( stream_.deviceBuffer,
- stream_.bufferSize * stream_.nDeviceChannels[0],
- stream_.deviceFormat[0] );
-
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput != ASIOTrue )
- memcpy( handle->bufferInfos[i].buffers[bufferIndex],
- &stream_.deviceBuffer[j++*bufferBytes], bufferBytes );
- }
-
- }
- else {
-
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer( stream_.userBuffer[0],
- stream_.bufferSize * stream_.nUserChannels[0],
- stream_.userFormat );
-
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput != ASIOTrue )
- memcpy( handle->bufferInfos[i].buffers[bufferIndex],
- &stream_.userBuffer[0][bufferBytes*j++], bufferBytes );
- }
-
- }
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- bufferBytes = stream_.bufferSize * formatBytes(stream_.deviceFormat[1]);
-
- if (stream_.doConvertBuffer[1]) {
-
- // Always interleave ASIO input data.
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput == ASIOTrue )
- memcpy( &stream_.deviceBuffer[j++*bufferBytes],
- handle->bufferInfos[i].buffers[bufferIndex],
- bufferBytes );
- }
-
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( stream_.deviceBuffer,
- stream_.bufferSize * stream_.nDeviceChannels[1],
- stream_.deviceFormat[1] );
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-
- }
- else {
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput == ASIOTrue ) {
- memcpy( &stream_.userBuffer[1][bufferBytes*j++],
- handle->bufferInfos[i].buffers[bufferIndex],
- bufferBytes );
- }
- }
-
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( stream_.userBuffer[1],
- stream_.bufferSize * stream_.nUserChannels[1],
- stream_.userFormat );
- }
- }
-
- unlock:
- // The following call was suggested by Malte Clasen. While the API
- // documentation indicates it should not be required, some device
- // drivers apparently do not function correctly without it.
- ASIOOutputReady();
-
- RtApi::tickStreamTime();
- return SUCCESS;
-}
-
-static void sampleRateChanged( ASIOSampleRate sRate )
-{
- // The ASIO documentation says that this usually only happens during
- // external sync. Audio processing is not stopped by the driver,
- // actual sample rate might not have even changed, maybe only the
- // sample rate status of an AES/EBU or S/PDIF digital input at the
- // audio device.
-
- RtApi *object = (RtApi *) asioCallbackInfo->object;
- try {
- object->stopStream();
- }
- catch ( RtAudioError &exception ) {
- std::cerr << "\nRtApiAsio: sampleRateChanged() error (" << exception.getMessage() << ")!\n" << std::endl;
- return;
- }
-
- std::cerr << "\nRtApiAsio: driver reports sample rate changed to " << sRate << " ... stream stopped!!!\n" << std::endl;
-}
-
-static long asioMessages( long selector, long value, void* /*message*/, double* /*opt*/ )
-{
- long ret = 0;
-
- switch( selector ) {
- case kAsioSelectorSupported:
- if ( value == kAsioResetRequest
- || value == kAsioEngineVersion
- || value == kAsioResyncRequest
- || value == kAsioLatenciesChanged
- // The following three were added for ASIO 2.0, you don't
- // necessarily have to support them.
- || value == kAsioSupportsTimeInfo
- || value == kAsioSupportsTimeCode
- || value == kAsioSupportsInputMonitor)
- ret = 1L;
- break;
- case kAsioResetRequest:
- // Defer the task and perform the reset of the driver during the
- // next "safe" situation. You cannot reset the driver right now,
- // as this code is called from the driver. Reset the driver is
- // done by completely destruct is. I.e. ASIOStop(),
- // ASIODisposeBuffers(), Destruction Afterwards you initialize the
- // driver again.
- std::cerr << "\nRtApiAsio: driver reset requested!!!" << std::endl;
- ret = 1L;
- break;
- case kAsioResyncRequest:
- // This informs the application that the driver encountered some
- // non-fatal data loss. It is used for synchronization purposes
- // of different media. Added mainly to work around the Win16Mutex
- // problems in Windows 95/98 with the Windows Multimedia system,
- // which could lose data because the Mutex was held too long by
- // another thread. However a driver can issue it in other
- // situations, too.
- // std::cerr << "\nRtApiAsio: driver resync requested!!!" << std::endl;
- asioXRun = true;
- ret = 1L;
- break;
- case kAsioLatenciesChanged:
- // This will inform the host application that the drivers were
- // latencies changed. Beware, it this does not mean that the
- // buffer sizes have changed! You might need to update internal
- // delay data.
- std::cerr << "\nRtApiAsio: driver latency may have changed!!!" << std::endl;
- ret = 1L;
- break;
- case kAsioEngineVersion:
- // Return the supported ASIO version of the host application. If
- // a host application does not implement this selector, ASIO 1.0
- // is assumed by the driver.
- ret = 2L;
- break;
- case kAsioSupportsTimeInfo:
- // Informs the driver whether the
- // asioCallbacks.bufferSwitchTimeInfo() callback is supported.
- // For compatibility with ASIO 1.0 drivers the host application
- // should always support the "old" bufferSwitch method, too.
- ret = 0;
- break;
- case kAsioSupportsTimeCode:
- // Informs the driver whether application is interested in time
- // code info. If an application does not need to know about time
- // code, the driver has less work to do.
- ret = 0;
- break;
- }
- return ret;
-}
-
-static const char* getAsioErrorString( ASIOError result )
-{
- struct Messages
- {
- ASIOError value;
- const char*message;
- };
-
- static const Messages m[] =
- {
- { ASE_NotPresent, "Hardware input or output is not present or available." },
- { ASE_HWMalfunction, "Hardware is malfunctioning." },
- { ASE_InvalidParameter, "Invalid input parameter." },
- { ASE_InvalidMode, "Invalid mode." },
- { ASE_SPNotAdvancing, "Sample position not advancing." },
- { ASE_NoClock, "Sample clock or rate cannot be determined or is not present." },
- { ASE_NoMemory, "Not enough memory to complete the request." }
- };
-
- for ( unsigned int i = 0; i < sizeof(m)/sizeof(m[0]); ++i )
- if ( m[i].value == result ) return m[i].message;
-
- return "Unknown error.";
-}
-
-//******************** End of __WINDOWS_ASIO__ *********************//
-#endif
-
-
-#if defined(__WINDOWS_WASAPI__) // Windows WASAPI API
-
-// Authored by Marcus Tomlinson <themarcustomlinson@gmail.com>, April 2014
-// - Introduces support for the Windows WASAPI API
-// - Aims to deliver bit streams to and from hardware at the lowest possible latency, via the absolute minimum buffer sizes required
-// - Provides flexible stream configuration to an otherwise strict and inflexible WASAPI interface
-// - Includes automatic internal conversion of sample rate and buffer size between hardware and the user
-
-#ifndef INITGUID
- #define INITGUID
-#endif
-#include <audioclient.h>
-#include <avrt.h>
-#include <mmdeviceapi.h>
-#include <functiondiscoverykeys_devpkey.h>
-
-//=============================================================================
-
-#define SAFE_RELEASE( objectPtr )\
-if ( objectPtr )\
-{\
- objectPtr->Release();\
- objectPtr = NULL;\
-}
-
-typedef HANDLE ( __stdcall *TAvSetMmThreadCharacteristicsPtr )( LPCWSTR TaskName, LPDWORD TaskIndex );
-
-//-----------------------------------------------------------------------------
-
-// WASAPI dictates stream sample rate, format, channel count, and in some cases, buffer size.
-// Therefore we must perform all necessary conversions to user buffers in order to satisfy these
-// requirements. WasapiBuffer ring buffers are used between HwIn->UserIn and UserOut->HwOut to
-// provide intermediate storage for read / write synchronization.
-class WasapiBuffer
-{
-public:
- WasapiBuffer()
- : buffer_( NULL ),
- bufferSize_( 0 ),
- inIndex_( 0 ),
- outIndex_( 0 ) {}
-
- ~WasapiBuffer() {
- free( buffer_ );
- }
-
- // sets the length of the internal ring buffer
- void setBufferSize( unsigned int bufferSize, unsigned int formatBytes ) {
- free( buffer_ );
-
- buffer_ = ( char* ) calloc( bufferSize, formatBytes );
-
- bufferSize_ = bufferSize;
- inIndex_ = 0;
- outIndex_ = 0;
- }
-
- // attempt to push a buffer into the ring buffer at the current "in" index
- bool pushBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
- {
- if ( !buffer || // incoming buffer is NULL
- bufferSize == 0 || // incoming buffer has no data
- bufferSize > bufferSize_ ) // incoming buffer too large
- {
- return false;
- }
-
- unsigned int relOutIndex = outIndex_;
- unsigned int inIndexEnd = inIndex_ + bufferSize;
- if ( relOutIndex < inIndex_ && inIndexEnd >= bufferSize_ ) {
- relOutIndex += bufferSize_;
- }
-
- // "in" index can end on the "out" index but cannot begin at it
- if ( inIndex_ <= relOutIndex && inIndexEnd > relOutIndex ) {
- return false; // not enough space between "in" index and "out" index
- }
-
- // copy buffer from external to internal
- int fromZeroSize = inIndex_ + bufferSize - bufferSize_;
- fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
- int fromInSize = bufferSize - fromZeroSize;
-
- switch( format )
- {
- case RTAUDIO_SINT8:
- memcpy( &( ( char* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( char ) );
- memcpy( buffer_, &( ( char* ) buffer )[fromInSize], fromZeroSize * sizeof( char ) );
- break;
- case RTAUDIO_SINT16:
- memcpy( &( ( short* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( short ) );
- memcpy( buffer_, &( ( short* ) buffer )[fromInSize], fromZeroSize * sizeof( short ) );
- break;
- case RTAUDIO_SINT24:
- memcpy( &( ( S24* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( S24 ) );
- memcpy( buffer_, &( ( S24* ) buffer )[fromInSize], fromZeroSize * sizeof( S24 ) );
- break;
- case RTAUDIO_SINT32:
- memcpy( &( ( int* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( int ) );
- memcpy( buffer_, &( ( int* ) buffer )[fromInSize], fromZeroSize * sizeof( int ) );
- break;
- case RTAUDIO_FLOAT32:
- memcpy( &( ( float* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( float ) );
- memcpy( buffer_, &( ( float* ) buffer )[fromInSize], fromZeroSize * sizeof( float ) );
- break;
- case RTAUDIO_FLOAT64:
- memcpy( &( ( double* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( double ) );
- memcpy( buffer_, &( ( double* ) buffer )[fromInSize], fromZeroSize * sizeof( double ) );
- break;
- }
-
- // update "in" index
- inIndex_ += bufferSize;
- inIndex_ %= bufferSize_;
-
- return true;
- }
-
- // attempt to pull a buffer from the ring buffer from the current "out" index
- bool pullBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
- {
- if ( !buffer || // incoming buffer is NULL
- bufferSize == 0 || // incoming buffer has no data
- bufferSize > bufferSize_ ) // incoming buffer too large
- {
- return false;
- }
-
- unsigned int relInIndex = inIndex_;
- unsigned int outIndexEnd = outIndex_ + bufferSize;
- if ( relInIndex < outIndex_ && outIndexEnd >= bufferSize_ ) {
- relInIndex += bufferSize_;
- }
-
- // "out" index can begin at and end on the "in" index
- if ( outIndex_ < relInIndex && outIndexEnd > relInIndex ) {
- return false; // not enough space between "out" index and "in" index
- }
-
- // copy buffer from internal to external
- int fromZeroSize = outIndex_ + bufferSize - bufferSize_;
- fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
- int fromOutSize = bufferSize - fromZeroSize;
-
- switch( format )
- {
- case RTAUDIO_SINT8:
- memcpy( buffer, &( ( char* ) buffer_ )[outIndex_], fromOutSize * sizeof( char ) );
- memcpy( &( ( char* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( char ) );
- break;
- case RTAUDIO_SINT16:
- memcpy( buffer, &( ( short* ) buffer_ )[outIndex_], fromOutSize * sizeof( short ) );
- memcpy( &( ( short* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( short ) );
- break;
- case RTAUDIO_SINT24:
- memcpy( buffer, &( ( S24* ) buffer_ )[outIndex_], fromOutSize * sizeof( S24 ) );
- memcpy( &( ( S24* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( S24 ) );
- break;
- case RTAUDIO_SINT32:
- memcpy( buffer, &( ( int* ) buffer_ )[outIndex_], fromOutSize * sizeof( int ) );
- memcpy( &( ( int* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( int ) );
- break;
- case RTAUDIO_FLOAT32:
- memcpy( buffer, &( ( float* ) buffer_ )[outIndex_], fromOutSize * sizeof( float ) );
- memcpy( &( ( float* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( float ) );
- break;
- case RTAUDIO_FLOAT64:
- memcpy( buffer, &( ( double* ) buffer_ )[outIndex_], fromOutSize * sizeof( double ) );
- memcpy( &( ( double* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( double ) );
- break;
- }
-
- // update "out" index
- outIndex_ += bufferSize;
- outIndex_ %= bufferSize_;
-
- return true;
- }
-
-private:
- char* buffer_;
- unsigned int bufferSize_;
- unsigned int inIndex_;
- unsigned int outIndex_;
-};
-
-//-----------------------------------------------------------------------------
-
-// In order to satisfy WASAPI's buffer requirements, we need a means of converting sample rate
-// between HW and the user. The convertBufferWasapi function is used to perform this conversion
-// between HwIn->UserIn and UserOut->HwOut during the stream callback loop.
-// This sample rate converter favors speed over quality, and works best with conversions between
-// one rate and its multiple.
-void convertBufferWasapi( char* outBuffer,
- const char* inBuffer,
- const unsigned int& channelCount,
- const unsigned int& inSampleRate,
- const unsigned int& outSampleRate,
- const unsigned int& inSampleCount,
- unsigned int& outSampleCount,
- const RtAudioFormat& format )
-{
- // calculate the new outSampleCount and relative sampleStep
- float sampleRatio = ( float ) outSampleRate / inSampleRate;
- float sampleStep = 1.0f / sampleRatio;
- float inSampleFraction = 0.0f;
-
- outSampleCount = ( unsigned int ) roundf( inSampleCount * sampleRatio );
-
- // frame-by-frame, copy each relative input sample into it's corresponding output sample
- for ( unsigned int outSample = 0; outSample < outSampleCount; outSample++ )
- {
- unsigned int inSample = ( unsigned int ) inSampleFraction;
-
- switch ( format )
- {
- case RTAUDIO_SINT8:
- memcpy( &( ( char* ) outBuffer )[ outSample * channelCount ], &( ( char* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( char ) );
- break;
- case RTAUDIO_SINT16:
- memcpy( &( ( short* ) outBuffer )[ outSample * channelCount ], &( ( short* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( short ) );
- break;
- case RTAUDIO_SINT24:
- memcpy( &( ( S24* ) outBuffer )[ outSample * channelCount ], &( ( S24* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( S24 ) );
- break;
- case RTAUDIO_SINT32:
- memcpy( &( ( int* ) outBuffer )[ outSample * channelCount ], &( ( int* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( int ) );
- break;
- case RTAUDIO_FLOAT32:
- memcpy( &( ( float* ) outBuffer )[ outSample * channelCount ], &( ( float* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( float ) );
- break;
- case RTAUDIO_FLOAT64:
- memcpy( &( ( double* ) outBuffer )[ outSample * channelCount ], &( ( double* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( double ) );
- break;
- }
-
- // jump to next in sample
- inSampleFraction += sampleStep;
- }
-}
-
-//-----------------------------------------------------------------------------
-
-// A structure to hold various information related to the WASAPI implementation.
-struct WasapiHandle
-{
- IAudioClient* captureAudioClient;
- IAudioClient* renderAudioClient;
- IAudioCaptureClient* captureClient;
- IAudioRenderClient* renderClient;
- HANDLE captureEvent;
- HANDLE renderEvent;
-
- WasapiHandle()
- : captureAudioClient( NULL ),
- renderAudioClient( NULL ),
- captureClient( NULL ),
- renderClient( NULL ),
- captureEvent( NULL ),
- renderEvent( NULL ) {}
-};
-
-//=============================================================================
-
-RtApiWasapi::RtApiWasapi()
- : coInitialized_( false ), deviceEnumerator_( NULL )
-{
- // WASAPI can run either apartment or multi-threaded
- HRESULT hr = CoInitialize( NULL );
- if ( !FAILED( hr ) )
- coInitialized_ = true;
-
- // Instantiate device enumerator
- hr = CoCreateInstance( __uuidof( MMDeviceEnumerator ), NULL,
- CLSCTX_ALL, __uuidof( IMMDeviceEnumerator ),
- ( void** ) &deviceEnumerator_ );
-
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::RtApiWasapi: Unable to instantiate device enumerator";
- error( RtAudioError::DRIVER_ERROR );
- }
-}
-
-//-----------------------------------------------------------------------------
-
-RtApiWasapi::~RtApiWasapi()
-{
- if ( stream_.state != STREAM_CLOSED )
- closeStream();
-
- SAFE_RELEASE( deviceEnumerator_ );
-
- // If this object previously called CoInitialize()
- if ( coInitialized_ )
- CoUninitialize();
-}
-
-//=============================================================================
-
-unsigned int RtApiWasapi::getDeviceCount( void )
-{
- unsigned int captureDeviceCount = 0;
- unsigned int renderDeviceCount = 0;
-
- IMMDeviceCollection* captureDevices = NULL;
- IMMDeviceCollection* renderDevices = NULL;
-
- // Count capture devices
- errorText_.clear();
- HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device collection.";
- goto Exit;
- }
-
- hr = captureDevices->GetCount( &captureDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device count.";
- goto Exit;
- }
-
- // Count render devices
- hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device collection.";
- goto Exit;
- }
-
- hr = renderDevices->GetCount( &renderDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device count.";
- goto Exit;
- }
-
-Exit:
- // release all references
- SAFE_RELEASE( captureDevices );
- SAFE_RELEASE( renderDevices );
-
- if ( errorText_.empty() )
- return captureDeviceCount + renderDeviceCount;
-
- error( RtAudioError::DRIVER_ERROR );
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-RtAudio::DeviceInfo RtApiWasapi::getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- unsigned int captureDeviceCount = 0;
- unsigned int renderDeviceCount = 0;
- std::string defaultDeviceName;
- bool isCaptureDevice = false;
-
- PROPVARIANT deviceNameProp;
- PROPVARIANT defaultDeviceNameProp;
-
- IMMDeviceCollection* captureDevices = NULL;
- IMMDeviceCollection* renderDevices = NULL;
- IMMDevice* devicePtr = NULL;
- IMMDevice* defaultDevicePtr = NULL;
- IAudioClient* audioClient = NULL;
- IPropertyStore* devicePropStore = NULL;
- IPropertyStore* defaultDevicePropStore = NULL;
-
- WAVEFORMATEX* deviceFormat = NULL;
- WAVEFORMATEX* closestMatchFormat = NULL;
-
- // probed
- info.probed = false;
-
- // Count capture devices
- errorText_.clear();
- RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
- HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device collection.";
- goto Exit;
- }
-
- hr = captureDevices->GetCount( &captureDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device count.";
- goto Exit;
- }
-
- // Count render devices
- hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device collection.";
- goto Exit;
- }
-
- hr = renderDevices->GetCount( &renderDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device count.";
- goto Exit;
- }
-
- // validate device index
- if ( device >= captureDeviceCount + renderDeviceCount ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Invalid device index.";
- errorType = RtAudioError::INVALID_USE;
- goto Exit;
- }
-
- // determine whether index falls within capture or render devices
- if ( device >= renderDeviceCount ) {
- hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device handle.";
- goto Exit;
- }
- isCaptureDevice = true;
- }
- else {
- hr = renderDevices->Item( device, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device handle.";
- goto Exit;
- }
- isCaptureDevice = false;
- }
-
- // get default device name
- if ( isCaptureDevice ) {
- hr = deviceEnumerator_->GetDefaultAudioEndpoint( eCapture, eConsole, &defaultDevicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default capture device handle.";
- goto Exit;
- }
- }
- else {
- hr = deviceEnumerator_->GetDefaultAudioEndpoint( eRender, eConsole, &defaultDevicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default render device handle.";
- goto Exit;
- }
- }
-
- hr = defaultDevicePtr->OpenPropertyStore( STGM_READ, &defaultDevicePropStore );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open default device property store.";
- goto Exit;
- }
- PropVariantInit( &defaultDeviceNameProp );
-
- hr = defaultDevicePropStore->GetValue( PKEY_Device_FriendlyName, &defaultDeviceNameProp );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default device property: PKEY_Device_FriendlyName.";
- goto Exit;
- }
-
- defaultDeviceName = convertCharPointerToStdString(defaultDeviceNameProp.pwszVal);
-
- // name
- hr = devicePtr->OpenPropertyStore( STGM_READ, &devicePropStore );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open device property store.";
- goto Exit;
- }
-
- PropVariantInit( &deviceNameProp );
-
- hr = devicePropStore->GetValue( PKEY_Device_FriendlyName, &deviceNameProp );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device property: PKEY_Device_FriendlyName.";
- goto Exit;
- }
-
- info.name =convertCharPointerToStdString(deviceNameProp.pwszVal);
-
- // is default
- if ( isCaptureDevice ) {
- info.isDefaultInput = info.name == defaultDeviceName;
- info.isDefaultOutput = false;
- }
- else {
- info.isDefaultInput = false;
- info.isDefaultOutput = info.name == defaultDeviceName;
- }
-
- // channel count
- hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, NULL, ( void** ) &audioClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device audio client.";
- goto Exit;
- }
-
- hr = audioClient->GetMixFormat( &deviceFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- if ( isCaptureDevice ) {
- info.inputChannels = deviceFormat->nChannels;
- info.outputChannels = 0;
- info.duplexChannels = 0;
- }
- else {
- info.inputChannels = 0;
- info.outputChannels = deviceFormat->nChannels;
- info.duplexChannels = 0;
- }
-
- // sample rates
- info.sampleRates.clear();
-
- // allow support for all sample rates as we have a built-in sample rate converter
- for ( unsigned int i = 0; i < MAX_SAMPLE_RATES; i++ ) {
- info.sampleRates.push_back( SAMPLE_RATES[i] );
- }
- info.preferredSampleRate = deviceFormat->nSamplesPerSec;
-
- // native format
- info.nativeFormats = 0;
-
- if ( deviceFormat->wFormatTag == WAVE_FORMAT_IEEE_FLOAT ||
- ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
- ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ) )
- {
- if ( deviceFormat->wBitsPerSample == 32 ) {
- info.nativeFormats |= RTAUDIO_FLOAT32;
- }
- else if ( deviceFormat->wBitsPerSample == 64 ) {
- info.nativeFormats |= RTAUDIO_FLOAT64;
- }
- }
- else if ( deviceFormat->wFormatTag == WAVE_FORMAT_PCM ||
- ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
- ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_PCM ) )
- {
- if ( deviceFormat->wBitsPerSample == 8 ) {
- info.nativeFormats |= RTAUDIO_SINT8;
- }
- else if ( deviceFormat->wBitsPerSample == 16 ) {
- info.nativeFormats |= RTAUDIO_SINT16;
- }
- else if ( deviceFormat->wBitsPerSample == 24 ) {
- info.nativeFormats |= RTAUDIO_SINT24;
- }
- else if ( deviceFormat->wBitsPerSample == 32 ) {
- info.nativeFormats |= RTAUDIO_SINT32;
- }
- }
-
- // probed
- info.probed = true;
-
-Exit:
- // release all references
- PropVariantClear( &deviceNameProp );
- PropVariantClear( &defaultDeviceNameProp );
-
- SAFE_RELEASE( captureDevices );
- SAFE_RELEASE( renderDevices );
- SAFE_RELEASE( devicePtr );
- SAFE_RELEASE( defaultDevicePtr );
- SAFE_RELEASE( audioClient );
- SAFE_RELEASE( devicePropStore );
- SAFE_RELEASE( defaultDevicePropStore );
-
- CoTaskMemFree( deviceFormat );
- CoTaskMemFree( closestMatchFormat );
-
- if ( !errorText_.empty() )
- error( errorType );
- return info;
-}
-
-//-----------------------------------------------------------------------------
-
-unsigned int RtApiWasapi::getDefaultOutputDevice( void )
-{
- for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
- if ( getDeviceInfo( i ).isDefaultOutput ) {
- return i;
- }
- }
-
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-unsigned int RtApiWasapi::getDefaultInputDevice( void )
-{
- for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
- if ( getDeviceInfo( i ).isDefaultInput ) {
- return i;
- }
- }
-
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::closeStream( void )
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiWasapi::closeStream: No open stream to close.";
- error( RtAudioError::WARNING );
- return;
- }
-
- if ( stream_.state != STREAM_STOPPED )
- stopStream();
-
- // clean up stream memory
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient )
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient )
-
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureClient )
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderClient )
-
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent )
- CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent );
-
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent )
- CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent );
-
- delete ( WasapiHandle* ) stream_.apiHandle;
- stream_.apiHandle = NULL;
-
- for ( int i = 0; i < 2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- // update stream state
- stream_.state = STREAM_CLOSED;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::startStream( void )
-{
- verifyStream();
-
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiWasapi::startStream: The stream is already running.";
- error( RtAudioError::WARNING );
- return;
- }
-
- // update stream state
- stream_.state = STREAM_RUNNING;
-
- // create WASAPI stream thread
- stream_.callbackInfo.thread = ( ThreadHandle ) CreateThread( NULL, 0, runWasapiThread, this, CREATE_SUSPENDED, NULL );
-
- if ( !stream_.callbackInfo.thread ) {
- errorText_ = "RtApiWasapi::startStream: Unable to instantiate callback thread.";
- error( RtAudioError::THREAD_ERROR );
- }
- else {
- SetThreadPriority( ( void* ) stream_.callbackInfo.thread, stream_.callbackInfo.priority );
- ResumeThread( ( void* ) stream_.callbackInfo.thread );
- }
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::stopStream( void )
-{
- verifyStream();
-
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiWasapi::stopStream: The stream is already stopped.";
- error( RtAudioError::WARNING );
- return;
- }
-
- // inform stream thread by setting stream state to STREAM_STOPPING
- stream_.state = STREAM_STOPPING;
-
- // wait until stream thread is stopped
- while( stream_.state != STREAM_STOPPED ) {
- Sleep( 1 );
- }
-
- // Wait for the last buffer to play before stopping.
- Sleep( 1000 * stream_.bufferSize / stream_.sampleRate );
-
- // stop capture client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::stopStream: Unable to stop capture stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // stop render client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::stopStream: Unable to stop render stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // close thread handle
- if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
- errorText_ = "RtApiWasapi::stopStream: Unable to close callback thread.";
- error( RtAudioError::THREAD_ERROR );
- return;
- }
-
- stream_.callbackInfo.thread = (ThreadHandle) NULL;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::abortStream( void )
-{
- verifyStream();
-
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiWasapi::abortStream: The stream is already stopped.";
- error( RtAudioError::WARNING );
- return;
- }
-
- // inform stream thread by setting stream state to STREAM_STOPPING
- stream_.state = STREAM_STOPPING;
-
- // wait until stream thread is stopped
- while ( stream_.state != STREAM_STOPPED ) {
- Sleep( 1 );
- }
-
- // stop capture client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::abortStream: Unable to stop capture stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // stop render client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::abortStream: Unable to stop render stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // close thread handle
- if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
- errorText_ = "RtApiWasapi::abortStream: Unable to close callback thread.";
- error( RtAudioError::THREAD_ERROR );
- return;
- }
-
- stream_.callbackInfo.thread = (ThreadHandle) NULL;
-}
-
-//-----------------------------------------------------------------------------
-
-bool RtApiWasapi::probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int* bufferSize,
- RtAudio::StreamOptions* options )
-{
- bool methodResult = FAILURE;
- unsigned int captureDeviceCount = 0;
- unsigned int renderDeviceCount = 0;
-
- IMMDeviceCollection* captureDevices = NULL;
- IMMDeviceCollection* renderDevices = NULL;
- IMMDevice* devicePtr = NULL;
- WAVEFORMATEX* deviceFormat = NULL;
- unsigned int bufferBytes;
- stream_.state = STREAM_STOPPED;
-
- // create API Handle if not already created
- if ( !stream_.apiHandle )
- stream_.apiHandle = ( void* ) new WasapiHandle();
-
- // Count capture devices
- errorText_.clear();
- RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
- HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device collection.";
- goto Exit;
- }
-
- hr = captureDevices->GetCount( &captureDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device count.";
- goto Exit;
- }
-
- // Count render devices
- hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device collection.";
- goto Exit;
- }
-
- hr = renderDevices->GetCount( &renderDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device count.";
- goto Exit;
- }
-
- // validate device index
- if ( device >= captureDeviceCount + renderDeviceCount ) {
- errorType = RtAudioError::INVALID_USE;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Invalid device index.";
- goto Exit;
- }
-
- // determine whether index falls within capture or render devices
- if ( device >= renderDeviceCount ) {
- if ( mode != INPUT ) {
- errorType = RtAudioError::INVALID_USE;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Capture device selected as output device.";
- goto Exit;
- }
-
- // retrieve captureAudioClient from devicePtr
- IAudioClient*& captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
-
- hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device handle.";
- goto Exit;
- }
-
- hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
- NULL, ( void** ) &captureAudioClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
- goto Exit;
- }
-
- hr = captureAudioClient->GetMixFormat( &deviceFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
- captureAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
- }
- else {
- if ( mode != OUTPUT ) {
- errorType = RtAudioError::INVALID_USE;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Render device selected as input device.";
- goto Exit;
- }
-
- // retrieve renderAudioClient from devicePtr
- IAudioClient*& renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
-
- hr = renderDevices->Item( device, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device handle.";
- goto Exit;
- }
-
- hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
- NULL, ( void** ) &renderAudioClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
- goto Exit;
- }
-
- hr = renderAudioClient->GetMixFormat( &deviceFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
- renderAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
- }
-
- // fill stream data
- if ( ( stream_.mode == OUTPUT && mode == INPUT ) ||
- ( stream_.mode == INPUT && mode == OUTPUT ) ) {
- stream_.mode = DUPLEX;
- }
- else {
- stream_.mode = mode;
- }
-
- stream_.device[mode] = device;
- stream_.doByteSwap[mode] = false;
- stream_.sampleRate = sampleRate;
- stream_.bufferSize = *bufferSize;
- stream_.nBuffers = 1;
- stream_.nUserChannels[mode] = channels;
- stream_.channelOffset[mode] = firstChannel;
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = getDeviceInfo( device ).nativeFormats;
-
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
- stream_.userInterleaved = false;
- else
- stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] ||
- stream_.nUserChannels != stream_.nDeviceChannels )
- stream_.doConvertBuffer[mode] = true;
- else if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- if ( stream_.doConvertBuffer[mode] )
- setConvertInfo( mode, 0 );
-
- // Allocate necessary internal buffers
- bufferBytes = stream_.nUserChannels[mode] * stream_.bufferSize * formatBytes( stream_.userFormat );
-
- stream_.userBuffer[mode] = ( char* ) calloc( bufferBytes, 1 );
- if ( !stream_.userBuffer[mode] ) {
- errorType = RtAudioError::MEMORY_ERROR;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Error allocating user buffer memory.";
- goto Exit;
- }
-
- if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME )
- stream_.callbackInfo.priority = 15;
- else
- stream_.callbackInfo.priority = 0;
-
- ///! TODO: RTAUDIO_MINIMIZE_LATENCY // Provide stream buffers directly to callback
- ///! TODO: RTAUDIO_HOG_DEVICE // Exclusive mode
-
- methodResult = SUCCESS;
-
-Exit:
- //clean up
- SAFE_RELEASE( captureDevices );
- SAFE_RELEASE( renderDevices );
- SAFE_RELEASE( devicePtr );
- CoTaskMemFree( deviceFormat );
-
- // if method failed, close the stream
- if ( methodResult == FAILURE )
- closeStream();
-
- if ( !errorText_.empty() )
- error( errorType );
- return methodResult;
-}
-
-//=============================================================================
-
-DWORD WINAPI RtApiWasapi::runWasapiThread( void* wasapiPtr )
-{
- if ( wasapiPtr )
- ( ( RtApiWasapi* ) wasapiPtr )->wasapiThread();
-
- return 0;
-}
-
-DWORD WINAPI RtApiWasapi::stopWasapiThread( void* wasapiPtr )
-{
- if ( wasapiPtr )
- ( ( RtApiWasapi* ) wasapiPtr )->stopStream();
-
- return 0;
-}
-
-DWORD WINAPI RtApiWasapi::abortWasapiThread( void* wasapiPtr )
-{
- if ( wasapiPtr )
- ( ( RtApiWasapi* ) wasapiPtr )->abortStream();
-
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::wasapiThread()
-{
- // as this is a new thread, we must CoInitialize it
- CoInitialize( NULL );
-
- HRESULT hr;
-
- IAudioClient* captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
- IAudioClient* renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
- IAudioCaptureClient* captureClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureClient;
- IAudioRenderClient* renderClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderClient;
- HANDLE captureEvent = ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent;
- HANDLE renderEvent = ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent;
-
- WAVEFORMATEX* captureFormat = NULL;
- WAVEFORMATEX* renderFormat = NULL;
- float captureSrRatio = 0.0f;
- float renderSrRatio = 0.0f;
- WasapiBuffer captureBuffer;
- WasapiBuffer renderBuffer;
-
- // declare local stream variables
- RtAudioCallback callback = ( RtAudioCallback ) stream_.callbackInfo.callback;
- BYTE* streamBuffer = NULL;
- unsigned long captureFlags = 0;
- unsigned int bufferFrameCount = 0;
- unsigned int numFramesPadding = 0;
- unsigned int convBufferSize = 0;
- bool callbackPushed = false;
- bool callbackPulled = false;
- bool callbackStopped = false;
- int callbackResult = 0;
-
- // convBuffer is used to store converted buffers between WASAPI and the user
- char* convBuffer = NULL;
- unsigned int convBuffSize = 0;
- unsigned int deviceBuffSize = 0;
-
- errorText_.clear();
- RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
-
- // Attempt to assign "Pro Audio" characteristic to thread
- HMODULE AvrtDll = LoadLibrary( (LPCTSTR) "AVRT.dll" );
- if ( AvrtDll ) {
- DWORD taskIndex = 0;
- TAvSetMmThreadCharacteristicsPtr AvSetMmThreadCharacteristicsPtr = ( TAvSetMmThreadCharacteristicsPtr ) GetProcAddress( AvrtDll, "AvSetMmThreadCharacteristicsW" );
- AvSetMmThreadCharacteristicsPtr( L"Pro Audio", &taskIndex );
- FreeLibrary( AvrtDll );
- }
-
- // start capture stream if applicable
- if ( captureAudioClient ) {
- hr = captureAudioClient->GetMixFormat( &captureFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- captureSrRatio = ( ( float ) captureFormat->nSamplesPerSec / stream_.sampleRate );
-
- // initialize capture stream according to desire buffer size
- float desiredBufferSize = stream_.bufferSize * captureSrRatio;
- REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / captureFormat->nSamplesPerSec );
-
- if ( !captureClient ) {
- hr = captureAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
- AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
- desiredBufferPeriod,
- desiredBufferPeriod,
- captureFormat,
- NULL );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize capture audio client.";
- goto Exit;
- }
-
- hr = captureAudioClient->GetService( __uuidof( IAudioCaptureClient ),
- ( void** ) &captureClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture client handle.";
- goto Exit;
- }
-
- // configure captureEvent to trigger on every available capture buffer
- captureEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
- if ( !captureEvent ) {
- errorType = RtAudioError::SYSTEM_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to create capture event.";
- goto Exit;
- }
-
- hr = captureAudioClient->SetEventHandle( captureEvent );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to set capture event handle.";
- goto Exit;
- }
-
- ( ( WasapiHandle* ) stream_.apiHandle )->captureClient = captureClient;
- ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent = captureEvent;
- }
-
- unsigned int inBufferSize = 0;
- hr = captureAudioClient->GetBufferSize( &inBufferSize );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to get capture buffer size.";
- goto Exit;
- }
-
- // scale outBufferSize according to stream->user sample rate ratio
- unsigned int outBufferSize = ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT];
- inBufferSize *= stream_.nDeviceChannels[INPUT];
-
- // set captureBuffer size
- captureBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[INPUT] ) );
-
- // reset the capture stream
- hr = captureAudioClient->Reset();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to reset capture stream.";
- goto Exit;
- }
-
- // start the capture stream
- hr = captureAudioClient->Start();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to start capture stream.";
- goto Exit;
- }
- }
-
- // start render stream if applicable
- if ( renderAudioClient ) {
- hr = renderAudioClient->GetMixFormat( &renderFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- renderSrRatio = ( ( float ) renderFormat->nSamplesPerSec / stream_.sampleRate );
-
- // initialize render stream according to desire buffer size
- float desiredBufferSize = stream_.bufferSize * renderSrRatio;
- REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / renderFormat->nSamplesPerSec );
-
- if ( !renderClient ) {
- hr = renderAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
- AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
- desiredBufferPeriod,
- desiredBufferPeriod,
- renderFormat,
- NULL );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize render audio client.";
- goto Exit;
- }
-
- hr = renderAudioClient->GetService( __uuidof( IAudioRenderClient ),
- ( void** ) &renderClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render client handle.";
- goto Exit;
- }
-
- // configure renderEvent to trigger on every available render buffer
- renderEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
- if ( !renderEvent ) {
- errorType = RtAudioError::SYSTEM_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to create render event.";
- goto Exit;
- }
-
- hr = renderAudioClient->SetEventHandle( renderEvent );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to set render event handle.";
- goto Exit;
- }
-
- ( ( WasapiHandle* ) stream_.apiHandle )->renderClient = renderClient;
- ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent = renderEvent;
- }
-
- unsigned int outBufferSize = 0;
- hr = renderAudioClient->GetBufferSize( &outBufferSize );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to get render buffer size.";
- goto Exit;
- }
-
- // scale inBufferSize according to user->stream sample rate ratio
- unsigned int inBufferSize = ( unsigned int ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT];
- outBufferSize *= stream_.nDeviceChannels[OUTPUT];
-
- // set renderBuffer size
- renderBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[OUTPUT] ) );
-
- // reset the render stream
- hr = renderAudioClient->Reset();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to reset render stream.";
- goto Exit;
- }
-
- // start the render stream
- hr = renderAudioClient->Start();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to start render stream.";
- goto Exit;
- }
- }
-
- if ( stream_.mode == INPUT ) {
- convBuffSize = ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
- deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
- }
- else if ( stream_.mode == OUTPUT ) {
- convBuffSize = ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
- deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
- }
- else if ( stream_.mode == DUPLEX ) {
- convBuffSize = std::max( ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
- ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
- deviceBuffSize = std::max( stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
- stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
- }
-
- convBuffer = ( char* ) malloc( convBuffSize );
- stream_.deviceBuffer = ( char* ) malloc( deviceBuffSize );
- if ( !convBuffer || !stream_.deviceBuffer ) {
- errorType = RtAudioError::MEMORY_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Error allocating device buffer memory.";
- goto Exit;
- }
-
- // stream process loop
- while ( stream_.state != STREAM_STOPPING ) {
- if ( !callbackPulled ) {
- // Callback Input
- // ==============
- // 1. Pull callback buffer from inputBuffer
- // 2. If 1. was successful: Convert callback buffer to user sample rate and channel count
- // Convert callback buffer to user format
-
- if ( captureAudioClient ) {
- // Pull callback buffer from inputBuffer
- callbackPulled = captureBuffer.pullBuffer( convBuffer,
- ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT],
- stream_.deviceFormat[INPUT] );
-
- if ( callbackPulled ) {
- // Convert callback buffer to user sample rate
- convertBufferWasapi( stream_.deviceBuffer,
- convBuffer,
- stream_.nDeviceChannels[INPUT],
- captureFormat->nSamplesPerSec,
- stream_.sampleRate,
- ( unsigned int ) ( stream_.bufferSize * captureSrRatio ),
- convBufferSize,
- stream_.deviceFormat[INPUT] );
-
- if ( stream_.doConvertBuffer[INPUT] ) {
- // Convert callback buffer to user format
- convertBuffer( stream_.userBuffer[INPUT],
- stream_.deviceBuffer,
- stream_.convertInfo[INPUT] );
- }
- else {
- // no further conversion, simple copy deviceBuffer to userBuffer
- memcpy( stream_.userBuffer[INPUT],
- stream_.deviceBuffer,
- stream_.bufferSize * stream_.nUserChannels[INPUT] * formatBytes( stream_.userFormat ) );
- }
- }
- }
- else {
- // if there is no capture stream, set callbackPulled flag
- callbackPulled = true;
- }
-
- // Execute Callback
- // ================
- // 1. Execute user callback method
- // 2. Handle return value from callback
-
- // if callback has not requested the stream to stop
- if ( callbackPulled && !callbackStopped ) {
- // Execute user callback method
- callbackResult = callback( stream_.userBuffer[OUTPUT],
- stream_.userBuffer[INPUT],
- stream_.bufferSize,
- getStreamTime(),
- captureFlags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY ? RTAUDIO_INPUT_OVERFLOW : 0,
- stream_.callbackInfo.userData );
-
- // Handle return value from callback
- if ( callbackResult == 1 ) {
- // instantiate a thread to stop this thread
- HANDLE threadHandle = CreateThread( NULL, 0, stopWasapiThread, this, 0, NULL );
- if ( !threadHandle ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream stop thread.";
- goto Exit;
- }
- else if ( !CloseHandle( threadHandle ) ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream stop thread handle.";
- goto Exit;
- }
-
- callbackStopped = true;
- }
- else if ( callbackResult == 2 ) {
- // instantiate a thread to stop this thread
- HANDLE threadHandle = CreateThread( NULL, 0, abortWasapiThread, this, 0, NULL );
- if ( !threadHandle ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream abort thread.";
- goto Exit;
- }
- else if ( !CloseHandle( threadHandle ) ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream abort thread handle.";
- goto Exit;
- }
-
- callbackStopped = true;
- }
- }
- }
-
- // Callback Output
- // ===============
- // 1. Convert callback buffer to stream format
- // 2. Convert callback buffer to stream sample rate and channel count
- // 3. Push callback buffer into outputBuffer
-
- if ( renderAudioClient && callbackPulled ) {
- if ( stream_.doConvertBuffer[OUTPUT] ) {
- // Convert callback buffer to stream format
- convertBuffer( stream_.deviceBuffer,
- stream_.userBuffer[OUTPUT],
- stream_.convertInfo[OUTPUT] );
-
- }
-
- // Convert callback buffer to stream sample rate
- convertBufferWasapi( convBuffer,
- stream_.deviceBuffer,
- stream_.nDeviceChannels[OUTPUT],
- stream_.sampleRate,
- renderFormat->nSamplesPerSec,
- stream_.bufferSize,
- convBufferSize,
- stream_.deviceFormat[OUTPUT] );
-
- // Push callback buffer into outputBuffer
- callbackPushed = renderBuffer.pushBuffer( convBuffer,
- convBufferSize * stream_.nDeviceChannels[OUTPUT],
- stream_.deviceFormat[OUTPUT] );
- }
- else {
- // if there is no render stream, set callbackPushed flag
- callbackPushed = true;
- }
-
- // Stream Capture
- // ==============
- // 1. Get capture buffer from stream
- // 2. Push capture buffer into inputBuffer
- // 3. If 2. was successful: Release capture buffer
-
- if ( captureAudioClient ) {
- // if the callback input buffer was not pulled from captureBuffer, wait for next capture event
- if ( !callbackPulled ) {
- WaitForSingleObject( captureEvent, INFINITE );
- }
-
- // Get capture buffer from stream
- hr = captureClient->GetBuffer( &streamBuffer,
- &bufferFrameCount,
- &captureFlags, NULL, NULL );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture buffer.";
- goto Exit;
- }
-
- if ( bufferFrameCount != 0 ) {
- // Push capture buffer into inputBuffer
- if ( captureBuffer.pushBuffer( ( char* ) streamBuffer,
- bufferFrameCount * stream_.nDeviceChannels[INPUT],
- stream_.deviceFormat[INPUT] ) )
- {
- // Release capture buffer
- hr = captureClient->ReleaseBuffer( bufferFrameCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
- goto Exit;
- }
- }
- else
- {
- // Inform WASAPI that capture was unsuccessful
- hr = captureClient->ReleaseBuffer( 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
- goto Exit;
- }
- }
- }
- else
- {
- // Inform WASAPI that capture was unsuccessful
- hr = captureClient->ReleaseBuffer( 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
- goto Exit;
- }
- }
- }
-
- // Stream Render
- // =============
- // 1. Get render buffer from stream
- // 2. Pull next buffer from outputBuffer
- // 3. If 2. was successful: Fill render buffer with next buffer
- // Release render buffer
-
- if ( renderAudioClient ) {
- // if the callback output buffer was not pushed to renderBuffer, wait for next render event
- if ( callbackPulled && !callbackPushed ) {
- WaitForSingleObject( renderEvent, INFINITE );
- }
-
- // Get render buffer from stream
- hr = renderAudioClient->GetBufferSize( &bufferFrameCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer size.";
- goto Exit;
- }
-
- hr = renderAudioClient->GetCurrentPadding( &numFramesPadding );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer padding.";
- goto Exit;
- }
-
- bufferFrameCount -= numFramesPadding;
-
- if ( bufferFrameCount != 0 ) {
- hr = renderClient->GetBuffer( bufferFrameCount, &streamBuffer );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer.";
- goto Exit;
- }
-
- // Pull next buffer from outputBuffer
- // Fill render buffer with next buffer
- if ( renderBuffer.pullBuffer( ( char* ) streamBuffer,
- bufferFrameCount * stream_.nDeviceChannels[OUTPUT],
- stream_.deviceFormat[OUTPUT] ) )
- {
- // Release render buffer
- hr = renderClient->ReleaseBuffer( bufferFrameCount, 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
- goto Exit;
- }
- }
- else
- {
- // Inform WASAPI that render was unsuccessful
- hr = renderClient->ReleaseBuffer( 0, 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
- goto Exit;
- }
- }
- }
- else
- {
- // Inform WASAPI that render was unsuccessful
- hr = renderClient->ReleaseBuffer( 0, 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
- goto Exit;
- }
- }
- }
-
- // if the callback buffer was pushed renderBuffer reset callbackPulled flag
- if ( callbackPushed ) {
- callbackPulled = false;
- }
-
- // tick stream time
- RtApi::tickStreamTime();
- }
-
-Exit:
- // clean up
- CoTaskMemFree( captureFormat );
- CoTaskMemFree( renderFormat );
-
- free ( convBuffer );
-
- CoUninitialize();
-
- // update stream state
- stream_.state = STREAM_STOPPED;
-
- if ( errorText_.empty() )
- return;
- else
- error( errorType );
-}
-
-//******************** End of __WINDOWS_WASAPI__ *********************//
-#endif
-
-
-#if defined(__WINDOWS_DS__) // Windows DirectSound API
-
-// Modified by Robin Davies, October 2005
-// - Improvements to DirectX pointer chasing.
-// - Bug fix for non-power-of-two Asio granularity used by Edirol PCR-A30.
-// - Auto-call CoInitialize for DSOUND and ASIO platforms.
-// Various revisions for RtAudio 4.0 by Gary Scavone, April 2007
-// Changed device query structure for RtAudio 4.0.7, January 2010
-
-#include <dsound.h>
-#include <assert.h>
-#include <algorithm>
-
-#if defined(__MINGW32__)
- // missing from latest mingw winapi
-#define WAVE_FORMAT_96M08 0x00010000 /* 96 kHz, Mono, 8-bit */
-#define WAVE_FORMAT_96S08 0x00020000 /* 96 kHz, Stereo, 8-bit */
-#define WAVE_FORMAT_96M16 0x00040000 /* 96 kHz, Mono, 16-bit */
-#define WAVE_FORMAT_96S16 0x00080000 /* 96 kHz, Stereo, 16-bit */
-#endif
-
-#define MINIMUM_DEVICE_BUFFER_SIZE 32768
-
-#ifdef _MSC_VER // if Microsoft Visual C++
-#pragma comment( lib, "winmm.lib" ) // then, auto-link winmm.lib. Otherwise, it has to be added manually.
-#endif
-
-static inline DWORD dsPointerBetween( DWORD pointer, DWORD laterPointer, DWORD earlierPointer, DWORD bufferSize )
-{
- if ( pointer > bufferSize ) pointer -= bufferSize;
- if ( laterPointer < earlierPointer ) laterPointer += bufferSize;
- if ( pointer < earlierPointer ) pointer += bufferSize;
- return pointer >= earlierPointer && pointer < laterPointer;
-}
-
-// A structure to hold various information related to the DirectSound
-// API implementation.
-struct DsHandle {
- unsigned int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
- void *id[2];
- void *buffer[2];
- bool xrun[2];
- UINT bufferPointer[2];
- DWORD dsBufferSize[2];
- DWORD dsPointerLeadTime[2]; // the number of bytes ahead of the safe pointer to lead by.
- HANDLE condition;
-
- DsHandle()
- :drainCounter(0), internalDrain(false) { id[0] = 0; id[1] = 0; buffer[0] = 0; buffer[1] = 0; xrun[0] = false; xrun[1] = false; bufferPointer[0] = 0; bufferPointer[1] = 0; }
-};
-
-// Declarations for utility functions, callbacks, and structures
-// specific to the DirectSound implementation.
-static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
- LPCTSTR description,
- LPCTSTR module,
- LPVOID lpContext );
-
-static const char* getErrorString( int code );
-
-static unsigned __stdcall callbackHandler( void *ptr );
-
-struct DsDevice {
- LPGUID id[2];
- bool validId[2];
- bool found;
- std::string name;
-
- DsDevice()
- : found(false) { validId[0] = false; validId[1] = false; }
-};
-
-struct DsProbeData {
- bool isInput;
- std::vector<struct DsDevice>* dsDevices;
-};
-
-RtApiDs :: RtApiDs()
-{
- // Dsound will run both-threaded. If CoInitialize fails, then just
- // accept whatever the mainline chose for a threading model.
- coInitialized_ = false;
- HRESULT hr = CoInitialize( NULL );
- if ( !FAILED( hr ) ) coInitialized_ = true;
-}
-
-RtApiDs :: ~RtApiDs()
-{
- if ( coInitialized_ ) CoUninitialize(); // balanced call.
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-// The DirectSound default output is always the first device.
-unsigned int RtApiDs :: getDefaultOutputDevice( void )
-{
- return 0;
-}
-
-// The DirectSound default input is always the first input device,
-// which is the first capture device enumerated.
-unsigned int RtApiDs :: getDefaultInputDevice( void )
-{
- return 0;
-}
-
-unsigned int RtApiDs :: getDeviceCount( void )
-{
- // Set query flag for previously found devices to false, so that we
- // can check for any devices that have disappeared.
- for ( unsigned int i=0; i<dsDevices.size(); i++ )
- dsDevices[i].found = false;
-
- // Query DirectSound devices.
- struct DsProbeData probeInfo;
- probeInfo.isInput = false;
- probeInfo.dsDevices = &dsDevices;
- HRESULT result = DirectSoundEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating output devices!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
-
- // Query DirectSoundCapture devices.
- probeInfo.isInput = true;
- result = DirectSoundCaptureEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating input devices!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
-
- // Clean out any devices that may have disappeared (code update submitted by Eli Zehngut).
- for ( unsigned int i=0; i<dsDevices.size(); ) {
- if ( dsDevices[i].found == false ) dsDevices.erase( dsDevices.begin() + i );
- else i++;
- }
-
- return static_cast<unsigned int>(dsDevices.size());
-}
-
-RtAudio::DeviceInfo RtApiDs :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- if ( dsDevices.size() == 0 ) {
- // Force a query of all devices
- getDeviceCount();
- if ( dsDevices.size() == 0 ) {
- errorText_ = "RtApiDs::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
- }
-
- if ( device >= dsDevices.size() ) {
- errorText_ = "RtApiDs::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- HRESULT result;
- if ( dsDevices[ device ].validId[0] == false ) goto probeInput;
-
- LPDIRECTSOUND output;
- DSCAPS outCaps;
- result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto probeInput;
- }
-
- outCaps.dwSize = sizeof( outCaps );
- result = output->GetCaps( &outCaps );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting capabilities!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto probeInput;
- }
-
- // Get output channel information.
- info.outputChannels = ( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ? 2 : 1;
-
- // Get sample rate information.
- info.sampleRates.clear();
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( SAMPLE_RATES[k] >= (unsigned int) outCaps.dwMinSecondarySampleRate &&
- SAMPLE_RATES[k] <= (unsigned int) outCaps.dwMaxSecondarySampleRate ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
- }
- }
-
- // Get format information.
- if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) info.nativeFormats |= RTAUDIO_SINT8;
-
- output->Release();
-
- if ( getDefaultOutputDevice() == device )
- info.isDefaultOutput = true;
-
- if ( dsDevices[ device ].validId[1] == false ) {
- info.name = dsDevices[ device ].name;
- info.probed = true;
- return info;
- }
-
- probeInput:
-
- LPDIRECTSOUNDCAPTURE input;
- result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- DSCCAPS inCaps;
- inCaps.dwSize = sizeof( inCaps );
- result = input->GetCaps( &inCaps );
- if ( FAILED( result ) ) {
- input->Release();
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting object capabilities (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get input channel information.
- info.inputChannels = inCaps.dwChannels;
-
- // Get sample rate and format information.
- std::vector<unsigned int> rates;
- if ( inCaps.dwChannels >= 2 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) info.nativeFormats |= RTAUDIO_SINT8;
-
- if ( info.nativeFormats & RTAUDIO_SINT16 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) rates.push_back( 96000 );
- }
- else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) rates.push_back( 96000 );
- }
- }
- else if ( inCaps.dwChannels == 1 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) info.nativeFormats |= RTAUDIO_SINT8;
-
- if ( info.nativeFormats & RTAUDIO_SINT16 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) rates.push_back( 96000 );
- }
- else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) rates.push_back( 96000 );
- }
- }
- else info.inputChannels = 0; // technically, this would be an error
-
- input->Release();
-
- if ( info.inputChannels == 0 ) return info;
-
- // Copy the supported rates to the info structure but avoid duplication.
- bool found;
- for ( unsigned int i=0; i<rates.size(); i++ ) {
- found = false;
- for ( unsigned int j=0; j<info.sampleRates.size(); j++ ) {
- if ( rates[i] == info.sampleRates[j] ) {
- found = true;
- break;
- }
- }
- if ( found == false ) info.sampleRates.push_back( rates[i] );
- }
- std::sort( info.sampleRates.begin(), info.sampleRates.end() );
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- if ( device == 0 ) info.isDefaultInput = true;
-
- // Copy name and return.
- info.name = dsDevices[ device ].name;
- info.probed = true;
- return info;
-}
-
-bool RtApiDs :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- if ( channels + firstChannel > 2 ) {
- errorText_ = "RtApiDs::probeDeviceOpen: DirectSound does not support more than 2 channels per device.";
- return FAILURE;
- }
-
- size_t nDevices = dsDevices.size();
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiDs::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiDs::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- if ( mode == OUTPUT ) {
- if ( dsDevices[ device ].validId[0] == false ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support output!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
- else { // mode == INPUT
- if ( dsDevices[ device ].validId[1] == false ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support input!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // According to a note in PortAudio, using GetDesktopWindow()
- // instead of GetForegroundWindow() is supposed to avoid problems
- // that occur when the application's window is not the foreground
- // window. Also, if the application window closes before the
- // DirectSound buffer, DirectSound can crash. In the past, I had
- // problems when using GetDesktopWindow() but it seems fine now
- // (January 2010). I'll leave it commented here.
- // HWND hWnd = GetForegroundWindow();
- HWND hWnd = GetDesktopWindow();
-
- // Check the numberOfBuffers parameter and limit the lowest value to
- // two. This is a judgement call and a value of two is probably too
- // low for capture, but it should work for playback.
- int nBuffers = 0;
- if ( options ) nBuffers = options->numberOfBuffers;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) nBuffers = 2;
- if ( nBuffers < 2 ) nBuffers = 3;
-
- // Check the lower range of the user-specified buffer size and set
- // (arbitrarily) to a lower bound of 32.
- if ( *bufferSize < 32 ) *bufferSize = 32;
-
- // Create the wave format structure. The data format setting will
- // be determined later.
- WAVEFORMATEX waveFormat;
- ZeroMemory( &waveFormat, sizeof(WAVEFORMATEX) );
- waveFormat.wFormatTag = WAVE_FORMAT_PCM;
- waveFormat.nChannels = channels + firstChannel;
- waveFormat.nSamplesPerSec = (unsigned long) sampleRate;
-
- // Determine the device buffer size. By default, we'll use the value
- // defined above (32K), but we will grow it to make allowances for
- // very large software buffer sizes.
- DWORD dsBufferSize = MINIMUM_DEVICE_BUFFER_SIZE;
- DWORD dsPointerLeadTime = 0;
-
- void *ohandle = 0, *bhandle = 0;
- HRESULT result;
- if ( mode == OUTPUT ) {
-
- LPDIRECTSOUND output;
- result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- DSCAPS outCaps;
- outCaps.dwSize = sizeof( outCaps );
- result = output->GetCaps( &outCaps );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting capabilities (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check channel information.
- if ( channels + firstChannel == 2 && !( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ) {
- errorStream_ << "RtApiDs::getDeviceInfo: the output device (" << dsDevices[ device ].name << ") does not support stereo playback.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check format information. Use 16-bit format unless not
- // supported or user requests 8-bit.
- if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT &&
- !( format == RTAUDIO_SINT8 && outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) ) {
- waveFormat.wBitsPerSample = 16;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- else {
- waveFormat.wBitsPerSample = 8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- stream_.userFormat = format;
-
- // Update wave format structure and buffer information.
- waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
- waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
- dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
-
- // If the user wants an even bigger buffer, increase the device buffer size accordingly.
- while ( dsPointerLeadTime * 2U > dsBufferSize )
- dsBufferSize *= 2;
-
- // Set cooperative level to DSSCL_EXCLUSIVE ... sound stops when window focus changes.
- // result = output->SetCooperativeLevel( hWnd, DSSCL_EXCLUSIVE );
- // Set cooperative level to DSSCL_PRIORITY ... sound remains when window focus changes.
- result = output->SetCooperativeLevel( hWnd, DSSCL_PRIORITY );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting cooperative level (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Even though we will write to the secondary buffer, we need to
- // access the primary buffer to set the correct output format
- // (since the default is 8-bit, 22 kHz!). Setup the DS primary
- // buffer description.
- DSBUFFERDESC bufferDescription;
- ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
- bufferDescription.dwSize = sizeof( DSBUFFERDESC );
- bufferDescription.dwFlags = DSBCAPS_PRIMARYBUFFER;
-
- // Obtain the primary buffer
- LPDIRECTSOUNDBUFFER buffer;
- result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") accessing primary buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the primary DS buffer sound format.
- result = buffer->SetFormat( &waveFormat );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting primary buffer format (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Setup the secondary DS buffer description.
- ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
- bufferDescription.dwSize = sizeof( DSBUFFERDESC );
- bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
- DSBCAPS_GLOBALFOCUS |
- DSBCAPS_GETCURRENTPOSITION2 |
- DSBCAPS_LOCHARDWARE ); // Force hardware mixing
- bufferDescription.dwBufferBytes = dsBufferSize;
- bufferDescription.lpwfxFormat = &waveFormat;
-
- // Try to create the secondary DS buffer. If that doesn't work,
- // try to use software mixing. Otherwise, there's a problem.
- result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
- DSBCAPS_GLOBALFOCUS |
- DSBCAPS_GETCURRENTPOSITION2 |
- DSBCAPS_LOCSOFTWARE ); // Force software mixing
- result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating secondary buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Get the buffer size ... might be different from what we specified.
- DSBCAPS dsbcaps;
- dsbcaps.dwSize = sizeof( DSBCAPS );
- result = buffer->GetCaps( &dsbcaps );
- if ( FAILED( result ) ) {
- output->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- dsBufferSize = dsbcaps.dwBufferBytes;
-
- // Lock the DS buffer
- LPVOID audioPtr;
- DWORD dataLen;
- result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- output->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Zero the DS buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the DS buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- output->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- ohandle = (void *) output;
- bhandle = (void *) buffer;
- }
-
- if ( mode == INPUT ) {
-
- LPDIRECTSOUNDCAPTURE input;
- result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- DSCCAPS inCaps;
- inCaps.dwSize = sizeof( inCaps );
- result = input->GetCaps( &inCaps );
- if ( FAILED( result ) ) {
- input->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting input capabilities (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check channel information.
- if ( inCaps.dwChannels < channels + firstChannel ) {
- errorText_ = "RtApiDs::getDeviceInfo: the input device does not support requested input channels.";
- return FAILURE;
- }
-
- // Check format information. Use 16-bit format unless user
- // requests 8-bit.
- DWORD deviceFormats;
- if ( channels + firstChannel == 2 ) {
- deviceFormats = WAVE_FORMAT_1S08 | WAVE_FORMAT_2S08 | WAVE_FORMAT_4S08 | WAVE_FORMAT_96S08;
- if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
- waveFormat.wBitsPerSample = 8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- else { // assume 16-bit is supported
- waveFormat.wBitsPerSample = 16;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- }
- else { // channel == 1
- deviceFormats = WAVE_FORMAT_1M08 | WAVE_FORMAT_2M08 | WAVE_FORMAT_4M08 | WAVE_FORMAT_96M08;
- if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
- waveFormat.wBitsPerSample = 8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- else { // assume 16-bit is supported
- waveFormat.wBitsPerSample = 16;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- }
- stream_.userFormat = format;
-
- // Update wave format structure and buffer information.
- waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
- waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
- dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
-
- // If the user wants an even bigger buffer, increase the device buffer size accordingly.
- while ( dsPointerLeadTime * 2U > dsBufferSize )
- dsBufferSize *= 2;
-
- // Setup the secondary DS buffer description.
- DSCBUFFERDESC bufferDescription;
- ZeroMemory( &bufferDescription, sizeof( DSCBUFFERDESC ) );
- bufferDescription.dwSize = sizeof( DSCBUFFERDESC );
- bufferDescription.dwFlags = 0;
- bufferDescription.dwReserved = 0;
- bufferDescription.dwBufferBytes = dsBufferSize;
- bufferDescription.lpwfxFormat = &waveFormat;
-
- // Create the capture buffer.
- LPDIRECTSOUNDCAPTUREBUFFER buffer;
- result = input->CreateCaptureBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- input->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating input buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Get the buffer size ... might be different from what we specified.
- DSCBCAPS dscbcaps;
- dscbcaps.dwSize = sizeof( DSCBCAPS );
- result = buffer->GetCaps( &dscbcaps );
- if ( FAILED( result ) ) {
- input->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- dsBufferSize = dscbcaps.dwBufferBytes;
-
- // NOTE: We could have a problem here if this is a duplex stream
- // and the play and capture hardware buffer sizes are different
- // (I'm actually not sure if that is a problem or not).
- // Currently, we are not verifying that.
-
- // Lock the capture buffer
- LPVOID audioPtr;
- DWORD dataLen;
- result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- input->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking input buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Zero the buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- input->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking input buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- ohandle = (void *) input;
- bhandle = (void *) buffer;
- }
-
- // Set various stream parameters
- DsHandle *handle = 0;
- stream_.nDeviceChannels[mode] = channels + firstChannel;
- stream_.nUserChannels[mode] = channels;
- stream_.bufferSize = *bufferSize;
- stream_.channelOffset[mode] = firstChannel;
- stream_.deviceInterleaved[mode] = true;
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
-
- // Set flag for buffer conversion
- stream_.doConvertBuffer[mode] = false;
- if (stream_.nUserChannels[mode] != stream_.nDeviceChannels[mode])
- stream_.doConvertBuffer[mode] = true;
- if (stream_.userFormat != stream_.deviceFormat[mode])
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate necessary internal buffers
- long bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= (long) bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- // Allocate our DsHandle structures for the stream.
- if ( stream_.apiHandle == 0 ) {
- try {
- handle = new DsHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error allocating AsioHandle memory.";
- goto error;
- }
-
- // Create a manual-reset event.
- handle->condition = CreateEvent( NULL, // no security
- TRUE, // manual-reset
- FALSE, // non-signaled initially
- NULL ); // unnamed
- stream_.apiHandle = (void *) handle;
- }
- else
- handle = (DsHandle *) stream_.apiHandle;
- handle->id[mode] = ohandle;
- handle->buffer[mode] = bhandle;
- handle->dsBufferSize[mode] = dsBufferSize;
- handle->dsPointerLeadTime[mode] = dsPointerLeadTime;
-
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
- if ( stream_.mode == OUTPUT && mode == INPUT )
- // We had already set up an output stream.
- stream_.mode = DUPLEX;
- else
- stream_.mode = mode;
- stream_.nBuffers = nBuffers;
- stream_.sampleRate = sampleRate;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- // Setup the callback thread.
- if ( stream_.callbackInfo.isRunning == false ) {
- unsigned threadId;
- stream_.callbackInfo.isRunning = true;
- stream_.callbackInfo.object = (void *) this;
- stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &callbackHandler,
- &stream_.callbackInfo, 0, &threadId );
- if ( stream_.callbackInfo.thread == 0 ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error creating callback thread!";
- goto error;
- }
-
- // Boost DS thread priority
- SetThreadPriority( (HANDLE) stream_.callbackInfo.thread, THREAD_PRIORITY_HIGHEST );
- }
- return SUCCESS;
-
- error:
- if ( handle ) {
- if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
- LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- if ( buffer ) buffer->Release();
- object->Release();
- }
- if ( handle->buffer[1] ) {
- LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- if ( buffer ) buffer->Release();
- object->Release();
- }
- CloseHandle( handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- return FAILURE;
-}
-
-void RtApiDs :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiDs::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- // Stop the callback thread.
- stream_.callbackInfo.isRunning = false;
- WaitForSingleObject( (HANDLE) stream_.callbackInfo.thread, INFINITE );
- CloseHandle( (HANDLE) stream_.callbackInfo.thread );
-
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
- if ( handle ) {
- if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
- LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- if ( buffer ) {
- buffer->Stop();
- buffer->Release();
- }
- object->Release();
- }
- if ( handle->buffer[1] ) {
- LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- if ( buffer ) {
- buffer->Stop();
- buffer->Release();
- }
- object->Release();
- }
- CloseHandle( handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiDs :: startStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiDs::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
-
- // Increase scheduler frequency on lesser windows (a side-effect of
- // increasing timer accuracy). On greater windows (Win2K or later),
- // this is already in effect.
- timeBeginPeriod( 1 );
-
- buffersRolling = false;
- duplexPrerollBytes = 0;
-
- if ( stream_.mode == DUPLEX ) {
- // 0.5 seconds of silence in DUPLEX mode while the devices spin up and synchronize.
- duplexPrerollBytes = (int) ( 0.5 * stream_.sampleRate * formatBytes( stream_.deviceFormat[1] ) * stream_.nDeviceChannels[1] );
- }
-
- HRESULT result = 0;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- result = buffer->Play( 0, 0, DSBPLAY_LOOPING );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- result = buffer->Start( DSCBSTART_LOOPING );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- ResetEvent( handle->condition );
- stream_.state = STREAM_RUNNING;
-
- unlock:
- if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiDs :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiDs::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- HRESULT result = 0;
- LPVOID audioPtr;
- DWORD dataLen;
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- WaitForSingleObject( handle->condition, INFINITE ); // block until signaled
- }
-
- stream_.state = STREAM_STOPPED;
-
- MUTEX_LOCK( &stream_.mutex );
-
- // Stop the buffer and clear memory
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- result = buffer->Stop();
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Lock the buffer and clear it so that if we start to play again,
- // we won't have old data playing.
- result = buffer->Lock( 0, handle->dsBufferSize[0], &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Zero the DS buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the DS buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // If we start playing again, we must begin at beginning of buffer.
- handle->bufferPointer[0] = 0;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- audioPtr = NULL;
- dataLen = 0;
-
- stream_.state = STREAM_STOPPED;
-
- if ( stream_.mode != DUPLEX )
- MUTEX_LOCK( &stream_.mutex );
-
- result = buffer->Stop();
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Lock the buffer and clear it so that if we start to play again,
- // we won't have old data playing.
- result = buffer->Lock( 0, handle->dsBufferSize[1], &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Zero the DS buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the DS buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // If we start recording again, we must begin at beginning of buffer.
- handle->bufferPointer[1] = 0;
- }
-
- unlock:
- timeEndPeriod( 1 ); // revert to normal scheduler frequency on lesser windows.
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiDs :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiDs::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
- handle->drainCounter = 2;
-
- stopStream();
-}
-
-void RtApiDs :: callbackEvent()
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) {
- Sleep( 50 ); // sleep 50 milliseconds
- return;
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiDs::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal is finished.
- if ( handle->drainCounter > stream_.nBuffers + 2 ) {
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == false )
- SetEvent( handle->condition );
- else
- stopStream();
- return;
- }
-
- // Invoke user callback to get fresh output data UNLESS we are
- // draining stream.
- if ( handle->drainCounter == 0 ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- abortStream();
- return;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- HRESULT result;
- DWORD currentWritePointer, safeWritePointer;
- DWORD currentReadPointer, safeReadPointer;
- UINT nextWritePointer;
-
- LPVOID buffer1 = NULL;
- LPVOID buffer2 = NULL;
- DWORD bufferSize1 = 0;
- DWORD bufferSize2 = 0;
-
- char *buffer;
- long bufferBytes;
-
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
-
- if ( buffersRolling == false ) {
- if ( stream_.mode == DUPLEX ) {
- //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
-
- // It takes a while for the devices to get rolling. As a result,
- // there's no guarantee that the capture and write device pointers
- // will move in lockstep. Wait here for both devices to start
- // rolling, and then set our buffer pointers accordingly.
- // e.g. Crystal Drivers: the capture buffer starts up 5700 to 9600
- // bytes later than the write buffer.
-
- // Stub: a serious risk of having a pre-emptive scheduling round
- // take place between the two GetCurrentPosition calls... but I'm
- // really not sure how to solve the problem. Temporarily boost to
- // Realtime priority, maybe; but I'm not sure what priority the
- // DirectSound service threads run at. We *should* be roughly
- // within a ms or so of correct.
-
- LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- LPDIRECTSOUNDCAPTUREBUFFER dsCaptureBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-
- DWORD startSafeWritePointer, startSafeReadPointer;
-
- result = dsWriteBuffer->GetCurrentPosition( NULL, &startSafeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- result = dsCaptureBuffer->GetCurrentPosition( NULL, &startSafeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- while ( true ) {
- result = dsWriteBuffer->GetCurrentPosition( NULL, &safeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- result = dsCaptureBuffer->GetCurrentPosition( NULL, &safeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- if ( safeWritePointer != startSafeWritePointer && safeReadPointer != startSafeReadPointer ) break;
- Sleep( 1 );
- }
-
- //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
-
- handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
- if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
- handle->bufferPointer[1] = safeReadPointer;
- }
- else if ( stream_.mode == OUTPUT ) {
-
- // Set the proper nextWritePosition after initial startup.
- LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- result = dsWriteBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
- if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
- }
-
- buffersRolling = true;
- }
-
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- LPDIRECTSOUNDBUFFER dsBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
- bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
- bufferBytes *= formatBytes( stream_.userFormat );
- memset( stream_.userBuffer[0], 0, bufferBytes );
- }
-
- // Setup parameters and do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[0];
- bufferBytes *= formatBytes( stream_.deviceFormat[0] );
- }
- else {
- buffer = stream_.userBuffer[0];
- bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
- bufferBytes *= formatBytes( stream_.userFormat );
- }
-
- // No byte swapping necessary in DirectSound implementation.
-
- // Ahhh ... windoze. 16-bit data is signed but 8-bit data is
- // unsigned. So, we need to convert our signed 8-bit data here to
- // unsigned.
- if ( stream_.deviceFormat[0] == RTAUDIO_SINT8 )
- for ( int i=0; i<bufferBytes; i++ ) buffer[i] = (unsigned char) ( buffer[i] + 128 );
-
- DWORD dsBufferSize = handle->dsBufferSize[0];
- nextWritePointer = handle->bufferPointer[0];
-
- DWORD endWrite, leadPointer;
- while ( true ) {
- // Find out where the read and "safe write" pointers are.
- result = dsBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- // We will copy our output buffer into the region between
- // safeWritePointer and leadPointer. If leadPointer is not
- // beyond the next endWrite position, wait until it is.
- leadPointer = safeWritePointer + handle->dsPointerLeadTime[0];
- //std::cout << "safeWritePointer = " << safeWritePointer << ", leadPointer = " << leadPointer << ", nextWritePointer = " << nextWritePointer << std::endl;
- if ( leadPointer > dsBufferSize ) leadPointer -= dsBufferSize;
- if ( leadPointer < nextWritePointer ) leadPointer += dsBufferSize; // unwrap offset
- endWrite = nextWritePointer + bufferBytes;
-
- // Check whether the entire write region is behind the play pointer.
- if ( leadPointer >= endWrite ) break;
-
- // If we are here, then we must wait until the leadPointer advances
- // beyond the end of our next write region. We use the
- // Sleep() function to suspend operation until that happens.
- double millis = ( endWrite - leadPointer ) * 1000.0;
- millis /= ( formatBytes( stream_.deviceFormat[0]) * stream_.nDeviceChannels[0] * stream_.sampleRate);
- if ( millis < 1.0 ) millis = 1.0;
- Sleep( (DWORD) millis );
- }
-
- if ( dsPointerBetween( nextWritePointer, safeWritePointer, currentWritePointer, dsBufferSize )
- || dsPointerBetween( endWrite, safeWritePointer, currentWritePointer, dsBufferSize ) ) {
- // We've strayed into the forbidden zone ... resync the read pointer.
- handle->xrun[0] = true;
- nextWritePointer = safeWritePointer + handle->dsPointerLeadTime[0] - bufferBytes;
- if ( nextWritePointer >= dsBufferSize ) nextWritePointer -= dsBufferSize;
- handle->bufferPointer[0] = nextWritePointer;
- endWrite = nextWritePointer + bufferBytes;
- }
-
- // Lock free space in the buffer
- result = dsBuffer->Lock( nextWritePointer, bufferBytes, &buffer1,
- &bufferSize1, &buffer2, &bufferSize2, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking buffer during playback!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- // Copy our buffer into the DS buffer
- CopyMemory( buffer1, buffer, bufferSize1 );
- if ( buffer2 != NULL ) CopyMemory( buffer2, buffer+bufferSize1, bufferSize2 );
-
- // Update our buffer offset and unlock sound buffer
- dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking buffer during playback!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- nextWritePointer = ( nextWritePointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
- handle->bufferPointer[0] = nextWritePointer;
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters.
- if ( stream_.doConvertBuffer[1] ) {
- buffer = stream_.deviceBuffer;
- bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[1];
- bufferBytes *= formatBytes( stream_.deviceFormat[1] );
- }
- else {
- buffer = stream_.userBuffer[1];
- bufferBytes = stream_.bufferSize * stream_.nUserChannels[1];
- bufferBytes *= formatBytes( stream_.userFormat );
- }
-
- LPDIRECTSOUNDCAPTUREBUFFER dsBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- long nextReadPointer = handle->bufferPointer[1];
- DWORD dsBufferSize = handle->dsBufferSize[1];
-
- // Find out where the write and "safe read" pointers are.
- result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
- DWORD endRead = nextReadPointer + bufferBytes;
-
- // Handling depends on whether we are INPUT or DUPLEX.
- // If we're in INPUT mode then waiting is a good thing. If we're in DUPLEX mode,
- // then a wait here will drag the write pointers into the forbidden zone.
- //
- // In DUPLEX mode, rather than wait, we will back off the read pointer until
- // it's in a safe position. This causes dropouts, but it seems to be the only
- // practical way to sync up the read and write pointers reliably, given the
- // the very complex relationship between phase and increment of the read and write
- // pointers.
- //
- // In order to minimize audible dropouts in DUPLEX mode, we will
- // provide a pre-roll period of 0.5 seconds in which we return
- // zeros from the read buffer while the pointers sync up.
-
- if ( stream_.mode == DUPLEX ) {
- if ( safeReadPointer < endRead ) {
- if ( duplexPrerollBytes <= 0 ) {
- // Pre-roll time over. Be more agressive.
- int adjustment = endRead-safeReadPointer;
-
- handle->xrun[1] = true;
- // Two cases:
- // - large adjustments: we've probably run out of CPU cycles, so just resync exactly,
- // and perform fine adjustments later.
- // - small adjustments: back off by twice as much.
- if ( adjustment >= 2*bufferBytes )
- nextReadPointer = safeReadPointer-2*bufferBytes;
- else
- nextReadPointer = safeReadPointer-bufferBytes-adjustment;
-
- if ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
-
- }
- else {
- // In pre=roll time. Just do it.
- nextReadPointer = safeReadPointer - bufferBytes;
- while ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
- }
- endRead = nextReadPointer + bufferBytes;
- }
- }
- else { // mode == INPUT
- while ( safeReadPointer < endRead && stream_.callbackInfo.isRunning ) {
- // See comments for playback.
- double millis = (endRead - safeReadPointer) * 1000.0;
- millis /= ( formatBytes(stream_.deviceFormat[1]) * stream_.nDeviceChannels[1] * stream_.sampleRate);
- if ( millis < 1.0 ) millis = 1.0;
- Sleep( (DWORD) millis );
-
- // Wake up and find out where we are now.
- result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
- }
- }
-
- // Lock free space in the buffer
- result = dsBuffer->Lock( nextReadPointer, bufferBytes, &buffer1,
- &bufferSize1, &buffer2, &bufferSize2, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking capture buffer!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- if ( duplexPrerollBytes <= 0 ) {
- // Copy our buffer into the DS buffer
- CopyMemory( buffer, buffer1, bufferSize1 );
- if ( buffer2 != NULL ) CopyMemory( buffer+bufferSize1, buffer2, bufferSize2 );
- }
- else {
- memset( buffer, 0, bufferSize1 );
- if ( buffer2 != NULL ) memset( buffer + bufferSize1, 0, bufferSize2 );
- duplexPrerollBytes -= bufferSize1 + bufferSize2;
- }
-
- // Update our buffer offset and unlock sound buffer
- nextReadPointer = ( nextReadPointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
- dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking capture buffer!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- handle->bufferPointer[1] = nextReadPointer;
-
- // No byte swapping necessary in DirectSound implementation.
-
- // If necessary, convert 8-bit data from unsigned to signed.
- if ( stream_.deviceFormat[1] == RTAUDIO_SINT8 )
- for ( int j=0; j<bufferBytes; j++ ) buffer[j] = (signed char) ( buffer[j] - 128 );
-
- // Do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[1] )
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
- RtApi::tickStreamTime();
-}
-
-// Definitions for utility functions and callbacks
-// specific to the DirectSound implementation.
-
-static unsigned __stdcall callbackHandler( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiDs *object = (RtApiDs *) info->object;
- bool* isRunning = &info->isRunning;
-
- while ( *isRunning == true ) {
- object->callbackEvent();
- }
-
- _endthreadex( 0 );
- return 0;
-}
-
-static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
- LPCTSTR description,
- LPCTSTR /*module*/,
- LPVOID lpContext )
-{
- struct DsProbeData& probeInfo = *(struct DsProbeData*) lpContext;
- std::vector<struct DsDevice>& dsDevices = *probeInfo.dsDevices;
-
- HRESULT hr;
- bool validDevice = false;
- if ( probeInfo.isInput == true ) {
- DSCCAPS caps;
- LPDIRECTSOUNDCAPTURE object;
-
- hr = DirectSoundCaptureCreate( lpguid, &object, NULL );
- if ( hr != DS_OK ) return TRUE;
-
- caps.dwSize = sizeof(caps);
- hr = object->GetCaps( &caps );
- if ( hr == DS_OK ) {
- if ( caps.dwChannels > 0 && caps.dwFormats > 0 )
- validDevice = true;
- }
- object->Release();
- }
- else {
- DSCAPS caps;
- LPDIRECTSOUND object;
- hr = DirectSoundCreate( lpguid, &object, NULL );
- if ( hr != DS_OK ) return TRUE;
-
- caps.dwSize = sizeof(caps);
- hr = object->GetCaps( &caps );
- if ( hr == DS_OK ) {
- if ( caps.dwFlags & DSCAPS_PRIMARYMONO || caps.dwFlags & DSCAPS_PRIMARYSTEREO )
- validDevice = true;
- }
- object->Release();
- }
-
- // If good device, then save its name and guid.
- std::string name = convertCharPointerToStdString( description );
- //if ( name == "Primary Sound Driver" || name == "Primary Sound Capture Driver" )
- if ( lpguid == NULL )
- name = "Default Device";
- if ( validDevice ) {
- for ( unsigned int i=0; i<dsDevices.size(); i++ ) {
- if ( dsDevices[i].name == name ) {
- dsDevices[i].found = true;
- if ( probeInfo.isInput ) {
- dsDevices[i].id[1] = lpguid;
- dsDevices[i].validId[1] = true;
- }
- else {
- dsDevices[i].id[0] = lpguid;
- dsDevices[i].validId[0] = true;
- }
- return TRUE;
- }
- }
-
- DsDevice device;
- device.name = name;
- device.found = true;
- if ( probeInfo.isInput ) {
- device.id[1] = lpguid;
- device.validId[1] = true;
- }
- else {
- device.id[0] = lpguid;
- device.validId[0] = true;
- }
- dsDevices.push_back( device );
- }
-
- return TRUE;
-}
-
-static const char* getErrorString( int code )
-{
- switch ( code ) {
-
- case DSERR_ALLOCATED:
- return "Already allocated";
-
- case DSERR_CONTROLUNAVAIL:
- return "Control unavailable";
-
- case DSERR_INVALIDPARAM:
- return "Invalid parameter";
-
- case DSERR_INVALIDCALL:
- return "Invalid call";
-
- case DSERR_GENERIC:
- return "Generic error";
-
- case DSERR_PRIOLEVELNEEDED:
- return "Priority level needed";
-
- case DSERR_OUTOFMEMORY:
- return "Out of memory";
-
- case DSERR_BADFORMAT:
- return "The sample rate or the channel format is not supported";
-
- case DSERR_UNSUPPORTED:
- return "Not supported";
-
- case DSERR_NODRIVER:
- return "No driver";
-
- case DSERR_ALREADYINITIALIZED:
- return "Already initialized";
-
- case DSERR_NOAGGREGATION:
- return "No aggregation";
-
- case DSERR_BUFFERLOST:
- return "Buffer lost";
-
- case DSERR_OTHERAPPHASPRIO:
- return "Another application already has priority";
-
- case DSERR_UNINITIALIZED:
- return "Uninitialized";
-
- default:
- return "DirectSound unknown error";
- }
-}
-//******************** End of __WINDOWS_DS__ *********************//
-#endif
-
-
-#if defined(__LINUX_ALSA__)
-
-#include <alsa/asoundlib.h>
-#include <unistd.h>
-
- // A structure to hold various information related to the ALSA API
- // implementation.
-struct AlsaHandle {
- snd_pcm_t *handles[2];
- bool synchronized;
- bool xrun[2];
- pthread_cond_t runnable_cv;
- bool runnable;
-
- AlsaHandle()
- :synchronized(false), runnable(false) { xrun[0] = false; xrun[1] = false; }
-};
-
-static void *alsaCallbackHandler( void * ptr );
-
-RtApiAlsa :: RtApiAlsa()
-{
- // Nothing to do here.
-}
-
-RtApiAlsa :: ~RtApiAlsa()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiAlsa :: getDeviceCount( void )
-{
- unsigned nDevices = 0;
- int result, subdevice, card;
- char name[64];
- snd_ctl_t *handle;
-
- // Count cards and devices
- card = -1;
- snd_card_next( &card );
- while ( card >= 0 ) {
- sprintf( name, "hw:%d", card );
- result = snd_ctl_open( &handle, name, 0 );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceCount: control open, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto nextcard;
- }
- subdevice = -1;
- while( 1 ) {
- result = snd_ctl_pcm_next_device( handle, &subdevice );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceCount: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- break;
- }
- if ( subdevice < 0 )
- break;
- nDevices++;
- }
- nextcard:
- snd_ctl_close( handle );
- snd_card_next( &card );
- }
-
- result = snd_ctl_open( &handle, "default", 0 );
- if (result == 0) {
- nDevices++;
- snd_ctl_close( handle );
- }
-
- return nDevices;
-}
-
-RtAudio::DeviceInfo RtApiAlsa :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- unsigned nDevices = 0;
- int result, subdevice, card;
- char name[64];
- snd_ctl_t *chandle;
-
- // Count cards and devices
- card = -1;
- subdevice = -1;
- snd_card_next( &card );
- while ( card >= 0 ) {
- sprintf( name, "hw:%d", card );
- result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: control open, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto nextcard;
- }
- subdevice = -1;
- while( 1 ) {
- result = snd_ctl_pcm_next_device( chandle, &subdevice );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- break;
- }
- if ( subdevice < 0 ) break;
- if ( nDevices == device ) {
- sprintf( name, "hw:%d,%d", card, subdevice );
- goto foundDevice;
- }
- nDevices++;
- }
- nextcard:
- snd_ctl_close( chandle );
- snd_card_next( &card );
- }
-
- result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
- if ( result == 0 ) {
- if ( nDevices == device ) {
- strcpy( name, "default" );
- goto foundDevice;
- }
- nDevices++;
- }
-
- if ( nDevices == 0 ) {
- errorText_ = "RtApiAlsa::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiAlsa::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- foundDevice:
-
- // If a stream is already open, we cannot probe the stream devices.
- // Thus, use the saved results.
- if ( stream_.state != STREAM_CLOSED &&
- ( stream_.device[0] == device || stream_.device[1] == device ) ) {
- snd_ctl_close( chandle );
- if ( device >= devices_.size() ) {
- errorText_ = "RtApiAlsa::getDeviceInfo: device ID was not present before stream was opened.";
- error( RtAudioError::WARNING );
- return info;
- }
- return devices_[ device ];
- }
-
- int openMode = SND_PCM_ASYNC;
- snd_pcm_stream_t stream;
- snd_pcm_info_t *pcminfo;
- snd_pcm_info_alloca( &pcminfo );
- snd_pcm_t *phandle;
- snd_pcm_hw_params_t *params;
- snd_pcm_hw_params_alloca( &params );
-
- // First try for playback unless default device (which has subdev -1)
- stream = SND_PCM_STREAM_PLAYBACK;
- snd_pcm_info_set_stream( pcminfo, stream );
- if ( subdevice != -1 ) {
- snd_pcm_info_set_device( pcminfo, subdevice );
- snd_pcm_info_set_subdevice( pcminfo, 0 );
-
- result = snd_ctl_pcm_info( chandle, pcminfo );
- if ( result < 0 ) {
- // Device probably doesn't support playback.
- goto captureProbe;
- }
- }
-
- result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto captureProbe;
- }
-
- // The device is open ... fill the parameter structure.
- result = snd_pcm_hw_params_any( phandle, params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto captureProbe;
- }
-
- // Get output channel information.
- unsigned int value;
- result = snd_pcm_hw_params_get_channels_max( params, &value );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") output channels, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto captureProbe;
- }
- info.outputChannels = value;
- snd_pcm_close( phandle );
-
- captureProbe:
- stream = SND_PCM_STREAM_CAPTURE;
- snd_pcm_info_set_stream( pcminfo, stream );
-
- // Now try for capture unless default device (with subdev = -1)
- if ( subdevice != -1 ) {
- result = snd_ctl_pcm_info( chandle, pcminfo );
- snd_ctl_close( chandle );
- if ( result < 0 ) {
- // Device probably doesn't support capture.
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
- }
- else
- snd_ctl_close( chandle );
-
- result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
-
- // The device is open ... fill the parameter structure.
- result = snd_pcm_hw_params_any( phandle, params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
-
- result = snd_pcm_hw_params_get_channels_max( params, &value );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") input channels, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
- info.inputChannels = value;
- snd_pcm_close( phandle );
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // ALSA doesn't provide default devices so we'll use the first available one.
- if ( device == 0 && info.outputChannels > 0 )
- info.isDefaultOutput = true;
- if ( device == 0 && info.inputChannels > 0 )
- info.isDefaultInput = true;
-
- probeParameters:
- // At this point, we just need to figure out the supported data
- // formats and sample rates. We'll proceed by opening the device in
- // the direction with the maximum number of channels, or playback if
- // they are equal. This might limit our sample rate options, but so
- // be it.
-
- if ( info.outputChannels >= info.inputChannels )
- stream = SND_PCM_STREAM_PLAYBACK;
- else
- stream = SND_PCM_STREAM_CAPTURE;
- snd_pcm_info_set_stream( pcminfo, stream );
-
- result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // The device is open ... fill the parameter structure.
- result = snd_pcm_hw_params_any( phandle, params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Test our discrete set of sample rate values.
- info.sampleRates.clear();
- for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
- if ( snd_pcm_hw_params_test_rate( phandle, params, SAMPLE_RATES[i], 0 ) == 0 ) {
- info.sampleRates.push_back( SAMPLE_RATES[i] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[i];
- }
- }
- if ( info.sampleRates.size() == 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: no supported sample rates found for device (" << name << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Probe the supported data formats ... we don't care about endian-ness just yet
- snd_pcm_format_t format;
- info.nativeFormats = 0;
- format = SND_PCM_FORMAT_S8;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT8;
- format = SND_PCM_FORMAT_S16;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT16;
- format = SND_PCM_FORMAT_S24;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT24;
- format = SND_PCM_FORMAT_S32;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT32;
- format = SND_PCM_FORMAT_FLOAT;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_FLOAT32;
- format = SND_PCM_FORMAT_FLOAT64;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_FLOAT64;
-
- // Check that we have at least one supported format
- if ( info.nativeFormats == 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: pcm device (" << name << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get the device name
- char *cardname;
- result = snd_card_get_name( card, &cardname );
- if ( result >= 0 ) {
- sprintf( name, "hw:%s,%d", cardname, subdevice );
- free( cardname );
- }
- info.name = name;
-
- // That's all ... close the device and return
- snd_pcm_close( phandle );
- info.probed = true;
- return info;
-}
-
-void RtApiAlsa :: saveDeviceInfo( void )
-{
- devices_.clear();
-
- unsigned int nDevices = getDeviceCount();
- devices_.resize( nDevices );
- for ( unsigned int i=0; i<nDevices; i++ )
- devices_[i] = getDeviceInfo( i );
-}
-
-bool RtApiAlsa :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-
-{
-#if defined(__RTAUDIO_DEBUG__)
- snd_output_t *out;
- snd_output_stdio_attach(&out, stderr, 0);
-#endif
-
- // I'm not using the "plug" interface ... too much inconsistent behavior.
-
- unsigned nDevices = 0;
- int result, subdevice, card;
- char name[64];
- snd_ctl_t *chandle;
-
- if ( options && options->flags & RTAUDIO_ALSA_USE_DEFAULT )
- snprintf(name, sizeof(name), "%s", "default");
- else {
- // Count cards and devices
- card = -1;
- snd_card_next( &card );
- while ( card >= 0 ) {
- sprintf( name, "hw:%d", card );
- result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::probeDeviceOpen: control open, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- subdevice = -1;
- while( 1 ) {
- result = snd_ctl_pcm_next_device( chandle, &subdevice );
- if ( result < 0 ) break;
- if ( subdevice < 0 ) break;
- if ( nDevices == device ) {
- sprintf( name, "hw:%d,%d", card, subdevice );
- snd_ctl_close( chandle );
- goto foundDevice;
- }
- nDevices++;
- }
- snd_ctl_close( chandle );
- snd_card_next( &card );
- }
-
- result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
- if ( result == 0 ) {
- if ( nDevices == device ) {
- strcpy( name, "default" );
- goto foundDevice;
- }
- nDevices++;
- }
-
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiAlsa::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiAlsa::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
- }
-
- foundDevice:
-
- // The getDeviceInfo() function will not work for a device that is
- // already open. Thus, we'll probe the system before opening a
- // stream and save the results for use by getDeviceInfo().
- if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) // only do once
- this->saveDeviceInfo();
-
- snd_pcm_stream_t stream;
- if ( mode == OUTPUT )
- stream = SND_PCM_STREAM_PLAYBACK;
- else
- stream = SND_PCM_STREAM_CAPTURE;
-
- snd_pcm_t *phandle;
- int openMode = SND_PCM_ASYNC;
- result = snd_pcm_open( &phandle, name, stream, openMode );
- if ( result < 0 ) {
- if ( mode == OUTPUT )
- errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for output.";
- else
- errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for input.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Fill the parameter structure.
- snd_pcm_hw_params_t *hw_params;
- snd_pcm_hw_params_alloca( &hw_params );
- result = snd_pcm_hw_params_any( phandle, hw_params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") parameters, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
-#if defined(__RTAUDIO_DEBUG__)
- fprintf( stderr, "\nRtApiAlsa: dump hardware params just after device open:\n\n" );
- snd_pcm_hw_params_dump( hw_params, out );
-#endif
-
- // Set access ... check user preference.
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) {
- stream_.userInterleaved = false;
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
- if ( result < 0 ) {
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
- stream_.deviceInterleaved[mode] = true;
- }
- else
- stream_.deviceInterleaved[mode] = false;
- }
- else {
- stream_.userInterleaved = true;
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
- if ( result < 0 ) {
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
- stream_.deviceInterleaved[mode] = false;
- }
- else
- stream_.deviceInterleaved[mode] = true;
- }
-
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") access, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine how to set the device format.
- stream_.userFormat = format;
- snd_pcm_format_t deviceFormat = SND_PCM_FORMAT_UNKNOWN;
-
- if ( format == RTAUDIO_SINT8 )
- deviceFormat = SND_PCM_FORMAT_S8;
- else if ( format == RTAUDIO_SINT16 )
- deviceFormat = SND_PCM_FORMAT_S16;
- else if ( format == RTAUDIO_SINT24 )
- deviceFormat = SND_PCM_FORMAT_S24;
- else if ( format == RTAUDIO_SINT32 )
- deviceFormat = SND_PCM_FORMAT_S32;
- else if ( format == RTAUDIO_FLOAT32 )
- deviceFormat = SND_PCM_FORMAT_FLOAT;
- else if ( format == RTAUDIO_FLOAT64 )
- deviceFormat = SND_PCM_FORMAT_FLOAT64;
-
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat) == 0) {
- stream_.deviceFormat[mode] = format;
- goto setFormat;
- }
-
- // The user requested format is not natively supported by the device.
- deviceFormat = SND_PCM_FORMAT_FLOAT64;
- if ( snd_pcm_hw_params_test_format( phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_FLOAT;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S32;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S24;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S16;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S8;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- goto setFormat;
- }
-
- // If we get here, no supported format was found.
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device " << device << " data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- return FAILURE;
-
- setFormat:
- result = snd_pcm_hw_params_set_format( phandle, hw_params, deviceFormat );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") data format, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine whether byte-swaping is necessary.
- stream_.doByteSwap[mode] = false;
- if ( deviceFormat != SND_PCM_FORMAT_S8 ) {
- result = snd_pcm_format_cpu_endian( deviceFormat );
- if ( result == 0 )
- stream_.doByteSwap[mode] = true;
- else if (result < 0) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") endian-ness, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Set the sample rate.
- result = snd_pcm_hw_params_set_rate_near( phandle, hw_params, (unsigned int*) &sampleRate, 0 );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting sample rate on device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine the number of channels for this device. We support a possible
- // minimum device channel number > than the value requested by the user.
- stream_.nUserChannels[mode] = channels;
- unsigned int value;
- result = snd_pcm_hw_params_get_channels_max( hw_params, &value );
- unsigned int deviceChannels = value;
- if ( result < 0 || deviceChannels < channels + firstChannel ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: requested channel parameters not supported by device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- result = snd_pcm_hw_params_get_channels_min( hw_params, &value );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting minimum channels for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- deviceChannels = value;
- if ( deviceChannels < channels + firstChannel ) deviceChannels = channels + firstChannel;
- stream_.nDeviceChannels[mode] = deviceChannels;
-
- // Set the device channels.
- result = snd_pcm_hw_params_set_channels( phandle, hw_params, deviceChannels );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting channels for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the buffer (or period) size.
- int dir = 0;
- snd_pcm_uframes_t periodSize = *bufferSize;
- result = snd_pcm_hw_params_set_period_size_near( phandle, hw_params, &periodSize, &dir );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting period size for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- *bufferSize = periodSize;
-
- // Set the buffer number, which in ALSA is referred to as the "period".
- unsigned int periods = 0;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) periods = 2;
- if ( options && options->numberOfBuffers > 0 ) periods = options->numberOfBuffers;
- if ( periods < 2 ) periods = 4; // a fairly safe default value
- result = snd_pcm_hw_params_set_periods_near( phandle, hw_params, &periods, &dir );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting periods for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // If attempting to setup a duplex stream, the bufferSize parameter
- // MUST be the same in both directions!
- if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- stream_.bufferSize = *bufferSize;
-
- // Install the hardware configuration
- result = snd_pcm_hw_params( phandle, hw_params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing hardware configuration on device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
-#if defined(__RTAUDIO_DEBUG__)
- fprintf(stderr, "\nRtApiAlsa: dump hardware params after installation:\n\n");
- snd_pcm_hw_params_dump( hw_params, out );
-#endif
-
- // Set the software configuration to fill buffers with zeros and prevent device stopping on xruns.
- snd_pcm_sw_params_t *sw_params = NULL;
- snd_pcm_sw_params_alloca( &sw_params );
- snd_pcm_sw_params_current( phandle, sw_params );
- snd_pcm_sw_params_set_start_threshold( phandle, sw_params, *bufferSize );
- snd_pcm_sw_params_set_stop_threshold( phandle, sw_params, ULONG_MAX );
- snd_pcm_sw_params_set_silence_threshold( phandle, sw_params, 0 );
-
- // The following two settings were suggested by Theo Veenker
- //snd_pcm_sw_params_set_avail_min( phandle, sw_params, *bufferSize );
- //snd_pcm_sw_params_set_xfer_align( phandle, sw_params, 1 );
-
- // here are two options for a fix
- //snd_pcm_sw_params_set_silence_size( phandle, sw_params, ULONG_MAX );
- snd_pcm_uframes_t val;
- snd_pcm_sw_params_get_boundary( sw_params, &val );
- snd_pcm_sw_params_set_silence_size( phandle, sw_params, val );
-
- result = snd_pcm_sw_params( phandle, sw_params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing software configuration on device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
-#if defined(__RTAUDIO_DEBUG__)
- fprintf(stderr, "\nRtApiAlsa: dump software params after installation:\n\n");
- snd_pcm_sw_params_dump( sw_params, out );
-#endif
-
- // Set flags for buffer conversion
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate the ApiHandle if necessary and then save.
- AlsaHandle *apiInfo = 0;
- if ( stream_.apiHandle == 0 ) {
- try {
- apiInfo = (AlsaHandle *) new AlsaHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating AlsaHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init( &apiInfo->runnable_cv, NULL ) ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
-
- stream_.apiHandle = (void *) apiInfo;
- apiInfo->handles[0] = 0;
- apiInfo->handles[1] = 0;
- }
- else {
- apiInfo = (AlsaHandle *) stream_.apiHandle;
- }
- apiInfo->handles[mode] = phandle;
- phandle = 0;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.sampleRate = sampleRate;
- stream_.nBuffers = periods;
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- // Setup thread if necessary.
- if ( stream_.mode == OUTPUT && mode == INPUT ) {
- // We had already set up an output stream.
- stream_.mode = DUPLEX;
- // Link the streams if possible.
- apiInfo->synchronized = false;
- if ( snd_pcm_link( apiInfo->handles[0], apiInfo->handles[1] ) == 0 )
- apiInfo->synchronized = true;
- else {
- errorText_ = "RtApiAlsa::probeDeviceOpen: unable to synchronize input and output devices.";
- error( RtAudioError::WARNING );
- }
- }
- else {
- stream_.mode = mode;
-
- // Setup callback thread.
- stream_.callbackInfo.object = (void *) this;
-
- // Set the thread attributes for joinable and realtime scheduling
- // priority (optional). The higher priority will only take affect
- // if the program is run as root or suid. Note, under Linux
- // processes with CAP_SYS_NICE privilege, a user can change
- // scheduling policy and priority (thus need not be root). See
- // POSIX "capabilities".
- pthread_attr_t attr;
- pthread_attr_init( &attr );
- pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
-
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
- if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
- // We previously attempted to increase the audio callback priority
- // to SCHED_RR here via the attributes. However, while no errors
- // were reported in doing so, it did not work. So, now this is
- // done in the alsaCallbackHandler function.
- stream_.callbackInfo.doRealtime = true;
- int priority = options->priority;
- int min = sched_get_priority_min( SCHED_RR );
- int max = sched_get_priority_max( SCHED_RR );
- if ( priority < min ) priority = min;
- else if ( priority > max ) priority = max;
- stream_.callbackInfo.priority = priority;
- }
-#endif
-
- stream_.callbackInfo.isRunning = true;
- result = pthread_create( &stream_.callbackInfo.thread, &attr, alsaCallbackHandler, &stream_.callbackInfo );
- pthread_attr_destroy( &attr );
- if ( result ) {
- stream_.callbackInfo.isRunning = false;
- errorText_ = "RtApiAlsa::error creating callback thread!";
- goto error;
- }
- }
-
- return SUCCESS;
-
- error:
- if ( apiInfo ) {
- pthread_cond_destroy( &apiInfo->runnable_cv );
- if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
- if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
- delete apiInfo;
- stream_.apiHandle = 0;
- }
-
- if ( phandle) snd_pcm_close( phandle );
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- return FAILURE;
-}
-
-void RtApiAlsa :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAlsa::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- stream_.callbackInfo.isRunning = false;
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED ) {
- apiInfo->runnable = true;
- pthread_cond_signal( &apiInfo->runnable_cv );
- }
- MUTEX_UNLOCK( &stream_.mutex );
- pthread_join( stream_.callbackInfo.thread, NULL );
-
- if ( stream_.state == STREAM_RUNNING ) {
- stream_.state = STREAM_STOPPED;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
- snd_pcm_drop( apiInfo->handles[0] );
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
- snd_pcm_drop( apiInfo->handles[1] );
- }
-
- if ( apiInfo ) {
- pthread_cond_destroy( &apiInfo->runnable_cv );
- if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
- if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
- delete apiInfo;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiAlsa :: startStream()
-{
- // This method calls snd_pcm_prepare if the device isn't already in that state.
-
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiAlsa::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- int result = 0;
- snd_pcm_state_t state;
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- state = snd_pcm_state( handle[0] );
- if ( state != SND_PCM_STATE_PREPARED ) {
- result = snd_pcm_prepare( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::startStream: error preparing output pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
- }
-
- if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
- result = snd_pcm_drop(handle[1]); // fix to remove stale data received since device has been open
- state = snd_pcm_state( handle[1] );
- if ( state != SND_PCM_STATE_PREPARED ) {
- result = snd_pcm_prepare( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::startStream: error preparing input pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
- }
-
- stream_.state = STREAM_RUNNING;
-
- unlock:
- apiInfo->runnable = true;
- pthread_cond_signal( &apiInfo->runnable_cv );
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result >= 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAlsa::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- int result = 0;
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( apiInfo->synchronized )
- result = snd_pcm_drop( handle[0] );
- else
- result = snd_pcm_drain( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::stopStream: error draining output pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
- result = snd_pcm_drop( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::stopStream: error stopping input pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- apiInfo->runnable = false; // fixes high CPU usage when stopped
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result >= 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAlsa::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- int result = 0;
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- result = snd_pcm_drop( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::abortStream: error aborting output pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
- result = snd_pcm_drop( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::abortStream: error aborting input pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- apiInfo->runnable = false; // fixes high CPU usage when stopped
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result >= 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: callbackEvent()
-{
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_LOCK( &stream_.mutex );
- while ( !apiInfo->runnable )
- pthread_cond_wait( &apiInfo->runnable_cv, &stream_.mutex );
-
- if ( stream_.state != STREAM_RUNNING ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
- MUTEX_UNLOCK( &stream_.mutex );
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAlsa::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- int doStopStream = 0;
- RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && apiInfo->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- apiInfo->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && apiInfo->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- apiInfo->xrun[1] = false;
- }
- doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
-
- if ( doStopStream == 2 ) {
- abortStream();
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) goto unlock;
-
- int result;
- char *buffer;
- int channels;
- snd_pcm_t **handle;
- snd_pcm_sframes_t frames;
- RtAudioFormat format;
- handle = (snd_pcm_t **) apiInfo->handles;
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters.
- if ( stream_.doConvertBuffer[1] ) {
- buffer = stream_.deviceBuffer;
- channels = stream_.nDeviceChannels[1];
- format = stream_.deviceFormat[1];
- }
- else {
- buffer = stream_.userBuffer[1];
- channels = stream_.nUserChannels[1];
- format = stream_.userFormat;
- }
-
- // Read samples from device in interleaved/non-interleaved format.
- if ( stream_.deviceInterleaved[1] )
- result = snd_pcm_readi( handle[1], buffer, stream_.bufferSize );
- else {
- void *bufs[channels];
- size_t offset = stream_.bufferSize * formatBytes( format );
- for ( int i=0; i<channels; i++ )
- bufs[i] = (void *) (buffer + (i * offset));
- result = snd_pcm_readn( handle[1], bufs, stream_.bufferSize );
- }
-
- if ( result < (int) stream_.bufferSize ) {
- // Either an error or overrun occured.
- if ( result == -EPIPE ) {
- snd_pcm_state_t state = snd_pcm_state( handle[1] );
- if ( state == SND_PCM_STATE_XRUN ) {
- apiInfo->xrun[1] = true;
- result = snd_pcm_prepare( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after overrun, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: audio read error, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- error( RtAudioError::WARNING );
- goto tryOutput;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( buffer, stream_.bufferSize * channels, format );
-
- // Do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[1] )
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-
- // Check stream latency
- result = snd_pcm_delay( handle[1], &frames );
- if ( result == 0 && frames > 0 ) stream_.latency[1] = frames;
- }
-
- tryOutput:
-
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters and do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- channels = stream_.nDeviceChannels[0];
- format = stream_.deviceFormat[0];
- }
- else {
- buffer = stream_.userBuffer[0];
- channels = stream_.nUserChannels[0];
- format = stream_.userFormat;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer(buffer, stream_.bufferSize * channels, format);
-
- // Write samples to device in interleaved/non-interleaved format.
- if ( stream_.deviceInterleaved[0] )
- result = snd_pcm_writei( handle[0], buffer, stream_.bufferSize );
- else {
- void *bufs[channels];
- size_t offset = stream_.bufferSize * formatBytes( format );
- for ( int i=0; i<channels; i++ )
- bufs[i] = (void *) (buffer + (i * offset));
- result = snd_pcm_writen( handle[0], bufs, stream_.bufferSize );
- }
-
- if ( result < (int) stream_.bufferSize ) {
- // Either an error or underrun occured.
- if ( result == -EPIPE ) {
- snd_pcm_state_t state = snd_pcm_state( handle[0] );
- if ( state == SND_PCM_STATE_XRUN ) {
- apiInfo->xrun[0] = true;
- result = snd_pcm_prepare( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after underrun, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- else
- errorText_ = "RtApiAlsa::callbackEvent: audio write error, underrun.";
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: audio write error, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- error( RtAudioError::WARNING );
- goto unlock;
- }
-
- // Check stream latency
- result = snd_pcm_delay( handle[0], &frames );
- if ( result == 0 && frames > 0 ) stream_.latency[0] = frames;
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
-
- RtApi::tickStreamTime();
- if ( doStopStream == 1 ) this->stopStream();
-}
-
-static void *alsaCallbackHandler( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiAlsa *object = (RtApiAlsa *) info->object;
- bool *isRunning = &info->isRunning;
-
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
- if ( &info->doRealtime ) {
- pthread_t tID = pthread_self(); // ID of this thread
- sched_param prio = { info->priority }; // scheduling priority of thread
- pthread_setschedparam( tID, SCHED_RR, &prio );
- }
-#endif
-
- while ( *isRunning == true ) {
- pthread_testcancel();
- object->callbackEvent();
- }
-
- pthread_exit( NULL );
-}
-
-//******************** End of __LINUX_ALSA__ *********************//
-#endif
-
-#if defined(__LINUX_PULSE__)
-
-// Code written by Peter Meerwald, pmeerw@pmeerw.net
-// and Tristan Matthews.
-
-#include <pulse/error.h>
-#include <pulse/simple.h>
-#include <cstdio>
-
-static const unsigned int SUPPORTED_SAMPLERATES[] = { 8000, 16000, 22050, 32000,
- 44100, 48000, 96000, 0};
-
-struct rtaudio_pa_format_mapping_t {
- RtAudioFormat rtaudio_format;
- pa_sample_format_t pa_format;
-};
-
-static const rtaudio_pa_format_mapping_t supported_sampleformats[] = {
- {RTAUDIO_SINT16, PA_SAMPLE_S16LE},
- {RTAUDIO_SINT32, PA_SAMPLE_S32LE},
- {RTAUDIO_FLOAT32, PA_SAMPLE_FLOAT32LE},
- {0, PA_SAMPLE_INVALID}};
-
-struct PulseAudioHandle {
- pa_simple *s_play;
- pa_simple *s_rec;
- pthread_t thread;
- pthread_cond_t runnable_cv;
- bool runnable;
- PulseAudioHandle() : s_play(0), s_rec(0), runnable(false) { }
-};
-
-RtApiPulse::~RtApiPulse()
-{
- if ( stream_.state != STREAM_CLOSED )
- closeStream();
-}
-
-unsigned int RtApiPulse::getDeviceCount( void )
-{
- return 1;
-}
-
-RtAudio::DeviceInfo RtApiPulse::getDeviceInfo( unsigned int /*device*/ )
-{
- RtAudio::DeviceInfo info;
- info.probed = true;
- info.name = "PulseAudio";
- info.outputChannels = 2;
- info.inputChannels = 2;
- info.duplexChannels = 2;
- info.isDefaultOutput = true;
- info.isDefaultInput = true;
-
- for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr )
- info.sampleRates.push_back( *sr );
-
- info.preferredSampleRate = 48000;
- info.nativeFormats = RTAUDIO_SINT16 | RTAUDIO_SINT32 | RTAUDIO_FLOAT32;
-
- return info;
-}
-
-static void *pulseaudio_callback( void * user )
-{
- CallbackInfo *cbi = static_cast<CallbackInfo *>( user );
- RtApiPulse *context = static_cast<RtApiPulse *>( cbi->object );
- volatile bool *isRunning = &cbi->isRunning;
-
- while ( *isRunning ) {
- pthread_testcancel();
- context->callbackEvent();
- }
-
- pthread_exit( NULL );
-}
-
-void RtApiPulse::closeStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- stream_.callbackInfo.isRunning = false;
- if ( pah ) {
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED ) {
- pah->runnable = true;
- pthread_cond_signal( &pah->runnable_cv );
- }
- MUTEX_UNLOCK( &stream_.mutex );
-
- pthread_join( pah->thread, 0 );
- if ( pah->s_play ) {
- pa_simple_flush( pah->s_play, NULL );
- pa_simple_free( pah->s_play );
- }
- if ( pah->s_rec )
- pa_simple_free( pah->s_rec );
-
- pthread_cond_destroy( &pah->runnable_cv );
- delete pah;
- stream_.apiHandle = 0;
- }
-
- if ( stream_.userBuffer[0] ) {
- free( stream_.userBuffer[0] );
- stream_.userBuffer[0] = 0;
- }
- if ( stream_.userBuffer[1] ) {
- free( stream_.userBuffer[1] );
- stream_.userBuffer[1] = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- stream_.mode = UNINITIALIZED;
-}
-
-void RtApiPulse::callbackEvent( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_LOCK( &stream_.mutex );
- while ( !pah->runnable )
- pthread_cond_wait( &pah->runnable_cv, &stream_.mutex );
-
- if ( stream_.state != STREAM_RUNNING ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
- MUTEX_UNLOCK( &stream_.mutex );
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::callbackEvent(): the stream is closed ... "
- "this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- int doStopStream = callback( stream_.userBuffer[OUTPUT], stream_.userBuffer[INPUT],
- stream_.bufferSize, streamTime, status,
- stream_.callbackInfo.userData );
-
- if ( doStopStream == 2 ) {
- abortStream();
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
- void *pulse_in = stream_.doConvertBuffer[INPUT] ? stream_.deviceBuffer : stream_.userBuffer[INPUT];
- void *pulse_out = stream_.doConvertBuffer[OUTPUT] ? stream_.deviceBuffer : stream_.userBuffer[OUTPUT];
-
- if ( stream_.state != STREAM_RUNNING )
- goto unlock;
-
- int pa_error;
- size_t bytes;
- if (stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( stream_.doConvertBuffer[OUTPUT] ) {
- convertBuffer( stream_.deviceBuffer,
- stream_.userBuffer[OUTPUT],
- stream_.convertInfo[OUTPUT] );
- bytes = stream_.nDeviceChannels[OUTPUT] * stream_.bufferSize *
- formatBytes( stream_.deviceFormat[OUTPUT] );
- } else
- bytes = stream_.nUserChannels[OUTPUT] * stream_.bufferSize *
- formatBytes( stream_.userFormat );
-
- if ( pa_simple_write( pah->s_play, pulse_out, bytes, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::callbackEvent: audio write error, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX) {
- if ( stream_.doConvertBuffer[INPUT] )
- bytes = stream_.nDeviceChannels[INPUT] * stream_.bufferSize *
- formatBytes( stream_.deviceFormat[INPUT] );
- else
- bytes = stream_.nUserChannels[INPUT] * stream_.bufferSize *
- formatBytes( stream_.userFormat );
-
- if ( pa_simple_read( pah->s_rec, pulse_in, bytes, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::callbackEvent: audio read error, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- if ( stream_.doConvertBuffer[INPUT] ) {
- convertBuffer( stream_.userBuffer[INPUT],
- stream_.deviceBuffer,
- stream_.convertInfo[INPUT] );
- }
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
- RtApi::tickStreamTime();
-
- if ( doStopStream == 1 )
- stopStream();
-}
-
-void RtApiPulse::startStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::startStream(): the stream is not open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiPulse::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- stream_.state = STREAM_RUNNING;
-
- pah->runnable = true;
- pthread_cond_signal( &pah->runnable_cv );
- MUTEX_UNLOCK( &stream_.mutex );
-}
-
-void RtApiPulse::stopStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::stopStream(): the stream is not open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiPulse::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- if ( pah && pah->s_play ) {
- int pa_error;
- if ( pa_simple_drain( pah->s_play, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::stopStream: error draining output device, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-}
-
-void RtApiPulse::abortStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle*>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::abortStream(): the stream is not open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiPulse::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- if ( pah && pah->s_play ) {
- int pa_error;
- if ( pa_simple_flush( pah->s_play, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::abortStream: error flushing output device, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-}
-
-bool RtApiPulse::probeDeviceOpen( unsigned int device, StreamMode mode,
- unsigned int channels, unsigned int firstChannel,
- unsigned int sampleRate, RtAudioFormat format,
- unsigned int *bufferSize, RtAudio::StreamOptions *options )
-{
- PulseAudioHandle *pah = 0;
- unsigned long bufferBytes = 0;
- pa_sample_spec ss;
-
- if ( device != 0 ) return false;
- if ( mode != INPUT && mode != OUTPUT ) return false;
- if ( channels != 1 && channels != 2 ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: unsupported number of channels.";
- return false;
- }
- ss.channels = channels;
-
- if ( firstChannel != 0 ) return false;
-
- bool sr_found = false;
- for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr ) {
- if ( sampleRate == *sr ) {
- sr_found = true;
- stream_.sampleRate = sampleRate;
- ss.rate = sampleRate;
- break;
- }
- }
- if ( !sr_found ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: unsupported sample rate.";
- return false;
- }
-
- bool sf_found = 0;
- for ( const rtaudio_pa_format_mapping_t *sf = supported_sampleformats;
- sf->rtaudio_format && sf->pa_format != PA_SAMPLE_INVALID; ++sf ) {
- if ( format == sf->rtaudio_format ) {
- sf_found = true;
- stream_.userFormat = sf->rtaudio_format;
- stream_.deviceFormat[mode] = stream_.userFormat;
- ss.format = sf->pa_format;
- break;
- }
- }
- if ( !sf_found ) { // Use internal data format conversion.
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- ss.format = PA_SAMPLE_FLOAT32LE;
- }
-
- // Set other stream parameters.
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
- stream_.nBuffers = 1;
- stream_.doByteSwap[mode] = false;
- stream_.nUserChannels[mode] = channels;
- stream_.nDeviceChannels[mode] = channels + firstChannel;
- stream_.channelOffset[mode] = 0;
- std::string streamName = "RtAudio";
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate necessary internal buffers.
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
- stream_.bufferSize = *bufferSize;
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.device[mode] = device;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- if ( !stream_.apiHandle ) {
- PulseAudioHandle *pah = new PulseAudioHandle;
- if ( !pah ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error allocating memory for handle.";
- goto error;
- }
-
- stream_.apiHandle = pah;
- if ( pthread_cond_init( &pah->runnable_cv, NULL ) != 0 ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error creating condition variable.";
- goto error;
- }
- }
- pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- int error;
- if ( options && !options->streamName.empty() ) streamName = options->streamName;
- switch ( mode ) {
- case INPUT:
- pa_buffer_attr buffer_attr;
- buffer_attr.fragsize = bufferBytes;
- buffer_attr.maxlength = -1;
-
- pah->s_rec = pa_simple_new( NULL, streamName.c_str(), PA_STREAM_RECORD, NULL, "Record", &ss, NULL, &buffer_attr, &error );
- if ( !pah->s_rec ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error connecting input to PulseAudio server.";
- goto error;
- }
- break;
- case OUTPUT:
- pah->s_play = pa_simple_new( NULL, "RtAudio", PA_STREAM_PLAYBACK, NULL, "Playback", &ss, NULL, NULL, &error );
- if ( !pah->s_play ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error connecting output to PulseAudio server.";
- goto error;
- }
- break;
- default:
- goto error;
- }
-
- if ( stream_.mode == UNINITIALIZED )
- stream_.mode = mode;
- else if ( stream_.mode == mode )
- goto error;
- else
- stream_.mode = DUPLEX;
-
- if ( !stream_.callbackInfo.isRunning ) {
- stream_.callbackInfo.object = this;
- stream_.callbackInfo.isRunning = true;
- if ( pthread_create( &pah->thread, NULL, pulseaudio_callback, (void *)&stream_.callbackInfo) != 0 ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error creating thread.";
- goto error;
- }
- }
-
- stream_.state = STREAM_STOPPED;
- return true;
-
- error:
- if ( pah && stream_.callbackInfo.isRunning ) {
- pthread_cond_destroy( &pah->runnable_cv );
- delete pah;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- return FAILURE;
-}
-
-//******************** End of __LINUX_PULSE__ *********************//
-#endif
-
-#if defined(__LINUX_OSS__)
-
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/soundcard.h>
-#include <errno.h>
-#include <math.h>
-
-static void *ossCallbackHandler(void * ptr);
-
-// A structure to hold various information related to the OSS API
-// implementation.
-struct OssHandle {
- int id[2]; // device ids
- bool xrun[2];
- bool triggered;
- pthread_cond_t runnable;
-
- OssHandle()
- :triggered(false) { id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-RtApiOss :: RtApiOss()
-{
- // Nothing to do here.
-}
-
-RtApiOss :: ~RtApiOss()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiOss :: getDeviceCount( void )
-{
- int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
- if ( mixerfd == -1 ) {
- errorText_ = "RtApiOss::getDeviceCount: error opening '/dev/mixer'.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- oss_sysinfo sysinfo;
- if ( ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ) == -1 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceCount: error getting sysinfo, OSS version >= 4.0 is required.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- close( mixerfd );
- return sysinfo.numaudios;
-}
-
-RtAudio::DeviceInfo RtApiOss :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
- if ( mixerfd == -1 ) {
- errorText_ = "RtApiOss::getDeviceInfo: error opening '/dev/mixer'.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- oss_sysinfo sysinfo;
- int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
- if ( result == -1 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceInfo: error getting sysinfo, OSS version >= 4.0 is required.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- unsigned nDevices = sysinfo.numaudios;
- if ( nDevices == 0 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- oss_audioinfo ainfo;
- ainfo.dev = device;
- result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
- close( mixerfd );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Probe channels
- if ( ainfo.caps & PCM_CAP_OUTPUT ) info.outputChannels = ainfo.max_channels;
- if ( ainfo.caps & PCM_CAP_INPUT ) info.inputChannels = ainfo.max_channels;
- if ( ainfo.caps & PCM_CAP_DUPLEX ) {
- if ( info.outputChannels > 0 && info.inputChannels > 0 && ainfo.caps & PCM_CAP_DUPLEX )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
- }
-
- // Probe data formats ... do for input
- unsigned long mask = ainfo.iformats;
- if ( mask & AFMT_S16_LE || mask & AFMT_S16_BE )
- info.nativeFormats |= RTAUDIO_SINT16;
- if ( mask & AFMT_S8 )
- info.nativeFormats |= RTAUDIO_SINT8;
- if ( mask & AFMT_S32_LE || mask & AFMT_S32_BE )
- info.nativeFormats |= RTAUDIO_SINT32;
- if ( mask & AFMT_FLOAT )
- info.nativeFormats |= RTAUDIO_FLOAT32;
- if ( mask & AFMT_S24_LE || mask & AFMT_S24_BE )
- info.nativeFormats |= RTAUDIO_SINT24;
-
- // Check that we have at least one supported format
- if ( info.nativeFormats == 0 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: device (" << ainfo.name << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Probe the supported sample rates.
- info.sampleRates.clear();
- if ( ainfo.nrates ) {
- for ( unsigned int i=0; i<ainfo.nrates; i++ ) {
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( ainfo.rates[i] == SAMPLE_RATES[k] ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
-
- break;
- }
- }
- }
- }
- else {
- // Check min and max rate values;
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( ainfo.min_rate <= (int) SAMPLE_RATES[k] && ainfo.max_rate >= (int) SAMPLE_RATES[k] ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
- }
- }
- }
-
- if ( info.sampleRates.size() == 0 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: no supported sample rates found for device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- else {
- info.probed = true;
- info.name = ainfo.name;
- }
-
- return info;
-}
-
-
-bool RtApiOss :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
- if ( mixerfd == -1 ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error opening '/dev/mixer'.";
- return FAILURE;
- }
-
- oss_sysinfo sysinfo;
- int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
- if ( result == -1 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::probeDeviceOpen: error getting sysinfo, OSS version >= 4.0 is required.";
- return FAILURE;
- }
-
- unsigned nDevices = sysinfo.numaudios;
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- close( mixerfd );
- errorText_ = "RtApiOss::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- close( mixerfd );
- errorText_ = "RtApiOss::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- oss_audioinfo ainfo;
- ainfo.dev = device;
- result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
- close( mixerfd );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check if device supports input or output
- if ( ( mode == OUTPUT && !( ainfo.caps & PCM_CAP_OUTPUT ) ) ||
- ( mode == INPUT && !( ainfo.caps & PCM_CAP_INPUT ) ) ) {
- if ( mode == OUTPUT )
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support output.";
- else
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support input.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- int flags = 0;
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( mode == OUTPUT )
- flags |= O_WRONLY;
- else { // mode == INPUT
- if (stream_.mode == OUTPUT && stream_.device[0] == device) {
- // We just set the same device for playback ... close and reopen for duplex (OSS only).
- close( handle->id[0] );
- handle->id[0] = 0;
- if ( !( ainfo.caps & PCM_CAP_DUPLEX ) ) {
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support duplex mode.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- // Check that the number previously set channels is the same.
- if ( stream_.nUserChannels[0] != channels ) {
- errorStream_ << "RtApiOss::probeDeviceOpen: input/output channels must be equal for OSS duplex device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- flags |= O_RDWR;
- }
- else
- flags |= O_RDONLY;
- }
-
- // Set exclusive access if specified.
- if ( options && options->flags & RTAUDIO_HOG_DEVICE ) flags |= O_EXCL;
-
- // Try to open the device.
- int fd;
- fd = open( ainfo.devnode, flags, 0 );
- if ( fd == -1 ) {
- if ( errno == EBUSY )
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") is busy.";
- else
- errorStream_ << "RtApiOss::probeDeviceOpen: error opening device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // For duplex operation, specifically set this mode (this doesn't seem to work).
- /*
- if ( flags | O_RDWR ) {
- result = ioctl( fd, SNDCTL_DSP_SETDUPLEX, NULL );
- if ( result == -1) {
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting duplex mode for device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
- */
-
- // Check the device channel support.
- stream_.nUserChannels[mode] = channels;
- if ( ainfo.max_channels < (int)(channels + firstChannel) ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: the device (" << ainfo.name << ") does not support requested channel parameters.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the number of channels.
- int deviceChannels = channels + firstChannel;
- result = ioctl( fd, SNDCTL_DSP_CHANNELS, &deviceChannels );
- if ( result == -1 || deviceChannels < (int)(channels + firstChannel) ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting channel parameters on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.nDeviceChannels[mode] = deviceChannels;
-
- // Get the data format mask
- int mask;
- result = ioctl( fd, SNDCTL_DSP_GETFMTS, &mask );
- if ( result == -1 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error getting device (" << ainfo.name << ") data formats.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine how to set the device format.
- stream_.userFormat = format;
- int deviceFormat = -1;
- stream_.doByteSwap[mode] = false;
- if ( format == RTAUDIO_SINT8 ) {
- if ( mask & AFMT_S8 ) {
- deviceFormat = AFMT_S8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- }
- else if ( format == RTAUDIO_SINT16 ) {
- if ( mask & AFMT_S16_NE ) {
- deviceFormat = AFMT_S16_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- else if ( mask & AFMT_S16_OE ) {
- deviceFormat = AFMT_S16_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- stream_.doByteSwap[mode] = true;
- }
- }
- else if ( format == RTAUDIO_SINT24 ) {
- if ( mask & AFMT_S24_NE ) {
- deviceFormat = AFMT_S24_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- }
- else if ( mask & AFMT_S24_OE ) {
- deviceFormat = AFMT_S24_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- stream_.doByteSwap[mode] = true;
- }
- }
- else if ( format == RTAUDIO_SINT32 ) {
- if ( mask & AFMT_S32_NE ) {
- deviceFormat = AFMT_S32_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- }
- else if ( mask & AFMT_S32_OE ) {
- deviceFormat = AFMT_S32_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- stream_.doByteSwap[mode] = true;
- }
- }
-
- if ( deviceFormat == -1 ) {
- // The user requested format is not natively supported by the device.
- if ( mask & AFMT_S16_NE ) {
- deviceFormat = AFMT_S16_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- else if ( mask & AFMT_S32_NE ) {
- deviceFormat = AFMT_S32_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- }
- else if ( mask & AFMT_S24_NE ) {
- deviceFormat = AFMT_S24_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- }
- else if ( mask & AFMT_S16_OE ) {
- deviceFormat = AFMT_S16_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- stream_.doByteSwap[mode] = true;
- }
- else if ( mask & AFMT_S32_OE ) {
- deviceFormat = AFMT_S32_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- stream_.doByteSwap[mode] = true;
- }
- else if ( mask & AFMT_S24_OE ) {
- deviceFormat = AFMT_S24_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- stream_.doByteSwap[mode] = true;
- }
- else if ( mask & AFMT_S8) {
- deviceFormat = AFMT_S8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- }
-
- if ( stream_.deviceFormat[mode] == 0 ) {
- // This really shouldn't happen ...
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the data format.
- int temp = deviceFormat;
- result = ioctl( fd, SNDCTL_DSP_SETFMT, &deviceFormat );
- if ( result == -1 || deviceFormat != temp ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting data format on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Attempt to set the buffer size. According to OSS, the minimum
- // number of buffers is two. The supposed minimum buffer size is 16
- // bytes, so that will be our lower bound. The argument to this
- // call is in the form 0xMMMMSSSS (hex), where the buffer size (in
- // bytes) is given as 2^SSSS and the number of buffers as 2^MMMM.
- // We'll check the actual value used near the end of the setup
- // procedure.
- int ossBufferBytes = *bufferSize * formatBytes( stream_.deviceFormat[mode] ) * deviceChannels;
- if ( ossBufferBytes < 16 ) ossBufferBytes = 16;
- int buffers = 0;
- if ( options ) buffers = options->numberOfBuffers;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) buffers = 2;
- if ( buffers < 2 ) buffers = 3;
- temp = ((int) buffers << 16) + (int)( log10( (double)ossBufferBytes ) / log10( 2.0 ) );
- result = ioctl( fd, SNDCTL_DSP_SETFRAGMENT, &temp );
- if ( result == -1 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting buffer size on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.nBuffers = buffers;
-
- // Save buffer size (in sample frames).
- *bufferSize = ossBufferBytes / ( formatBytes(stream_.deviceFormat[mode]) * deviceChannels );
- stream_.bufferSize = *bufferSize;
-
- // Set the sample rate.
- int srate = sampleRate;
- result = ioctl( fd, SNDCTL_DSP_SPEED, &srate );
- if ( result == -1 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting sample rate (" << sampleRate << ") on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Verify the sample rate setup worked.
- if ( abs( srate - sampleRate ) > 100 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support sample rate (" << sampleRate << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.sampleRate = sampleRate;
-
- if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device) {
- // We're doing duplex setup here.
- stream_.deviceFormat[0] = stream_.deviceFormat[1];
- stream_.nDeviceChannels[0] = deviceChannels;
- }
-
- // Set interleaving parameters.
- stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
- stream_.userInterleaved = false;
-
- // Set flags for buffer conversion
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate the stream handles if necessary and then save.
- if ( stream_.apiHandle == 0 ) {
- try {
- handle = new OssHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error allocating OssHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init( &handle->runnable, NULL ) ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
-
- stream_.apiHandle = (void *) handle;
- }
- else {
- handle = (OssHandle *) stream_.apiHandle;
- }
- handle->id[mode] = fd;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- // Setup thread if necessary.
- if ( stream_.mode == OUTPUT && mode == INPUT ) {
- // We had already set up an output stream.
- stream_.mode = DUPLEX;
- if ( stream_.device[0] == device ) handle->id[0] = fd;
- }
- else {
- stream_.mode = mode;
-
- // Setup callback thread.
- stream_.callbackInfo.object = (void *) this;
-
- // Set the thread attributes for joinable and realtime scheduling
- // priority. The higher priority will only take affect if the
- // program is run as root or suid.
- pthread_attr_t attr;
- pthread_attr_init( &attr );
- pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
- if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
- struct sched_param param;
- int priority = options->priority;
- int min = sched_get_priority_min( SCHED_RR );
- int max = sched_get_priority_max( SCHED_RR );
- if ( priority < min ) priority = min;
- else if ( priority > max ) priority = max;
- param.sched_priority = priority;
- pthread_attr_setschedparam( &attr, &param );
- pthread_attr_setschedpolicy( &attr, SCHED_RR );
- }
- else
- pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
-#else
- pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
-#endif
-
- stream_.callbackInfo.isRunning = true;
- result = pthread_create( &stream_.callbackInfo.thread, &attr, ossCallbackHandler, &stream_.callbackInfo );
- pthread_attr_destroy( &attr );
- if ( result ) {
- stream_.callbackInfo.isRunning = false;
- errorText_ = "RtApiOss::error creating callback thread!";
- goto error;
- }
- }
-
- return SUCCESS;
-
- error:
- if ( handle ) {
- pthread_cond_destroy( &handle->runnable );
- if ( handle->id[0] ) close( handle->id[0] );
- if ( handle->id[1] ) close( handle->id[1] );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- return FAILURE;
-}
-
-void RtApiOss :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiOss::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- stream_.callbackInfo.isRunning = false;
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED )
- pthread_cond_signal( &handle->runnable );
- MUTEX_UNLOCK( &stream_.mutex );
- pthread_join( stream_.callbackInfo.thread, NULL );
-
- if ( stream_.state == STREAM_RUNNING ) {
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
- ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
- else
- ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
- stream_.state = STREAM_STOPPED;
- }
-
- if ( handle ) {
- pthread_cond_destroy( &handle->runnable );
- if ( handle->id[0] ) close( handle->id[0] );
- if ( handle->id[1] ) close( handle->id[1] );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiOss :: startStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiOss::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- stream_.state = STREAM_RUNNING;
-
- // No need to do anything else here ... OSS automatically starts
- // when fed samples.
-
- MUTEX_UNLOCK( &stream_.mutex );
-
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- pthread_cond_signal( &handle->runnable );
-}
-
-void RtApiOss :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiOss::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
-
- int result = 0;
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- // Flush the output with zeros a few times.
- char *buffer;
- int samples;
- RtAudioFormat format;
-
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- samples = stream_.bufferSize * stream_.nDeviceChannels[0];
- format = stream_.deviceFormat[0];
- }
- else {
- buffer = stream_.userBuffer[0];
- samples = stream_.bufferSize * stream_.nUserChannels[0];
- format = stream_.userFormat;
- }
-
- memset( buffer, 0, samples * formatBytes(format) );
- for ( unsigned int i=0; i<stream_.nBuffers+1; i++ ) {
- result = write( handle->id[0], buffer, samples * formatBytes(format) );
- if ( result == -1 ) {
- errorText_ = "RtApiOss::stopStream: audio write error.";
- error( RtAudioError::WARNING );
- }
- }
-
- result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::stopStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- handle->triggered = false;
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
- result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::stopStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result != -1 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiOss :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiOss::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
-
- int result = 0;
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::abortStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- handle->triggered = false;
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
- result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::abortStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result != -1 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiOss :: callbackEvent()
-{
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_LOCK( &stream_.mutex );
- pthread_cond_wait( &handle->runnable, &stream_.mutex );
- if ( stream_.state != STREAM_RUNNING ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
- MUTEX_UNLOCK( &stream_.mutex );
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiOss::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- // Invoke user callback to get fresh output data.
- int doStopStream = 0;
- RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
- doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
- if ( doStopStream == 2 ) {
- this->abortStream();
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) goto unlock;
-
- int result;
- char *buffer;
- int samples;
- RtAudioFormat format;
-
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters and do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- samples = stream_.bufferSize * stream_.nDeviceChannels[0];
- format = stream_.deviceFormat[0];
- }
- else {
- buffer = stream_.userBuffer[0];
- samples = stream_.bufferSize * stream_.nUserChannels[0];
- format = stream_.userFormat;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer( buffer, samples, format );
-
- if ( stream_.mode == DUPLEX && handle->triggered == false ) {
- int trig = 0;
- ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
- result = write( handle->id[0], buffer, samples * formatBytes(format) );
- trig = PCM_ENABLE_INPUT|PCM_ENABLE_OUTPUT;
- ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
- handle->triggered = true;
- }
- else
- // Write samples to device.
- result = write( handle->id[0], buffer, samples * formatBytes(format) );
-
- if ( result == -1 ) {
- // We'll assume this is an underrun, though there isn't a
- // specific means for determining that.
- handle->xrun[0] = true;
- errorText_ = "RtApiOss::callbackEvent: audio write error.";
- error( RtAudioError::WARNING );
- // Continue on to input section.
- }
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters.
- if ( stream_.doConvertBuffer[1] ) {
- buffer = stream_.deviceBuffer;
- samples = stream_.bufferSize * stream_.nDeviceChannels[1];
- format = stream_.deviceFormat[1];
- }
- else {
- buffer = stream_.userBuffer[1];
- samples = stream_.bufferSize * stream_.nUserChannels[1];
- format = stream_.userFormat;
- }
-
- // Read samples from device.
- result = read( handle->id[1], buffer, samples * formatBytes(format) );
-
- if ( result == -1 ) {
- // We'll assume this is an overrun, though there isn't a
- // specific means for determining that.
- handle->xrun[1] = true;
- errorText_ = "RtApiOss::callbackEvent: audio read error.";
- error( RtAudioError::WARNING );
- goto unlock;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( buffer, samples, format );
-
- // Do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[1] )
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
-
- RtApi::tickStreamTime();
- if ( doStopStream == 1 ) this->stopStream();
-}
-
-static void *ossCallbackHandler( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiOss *object = (RtApiOss *) info->object;
- bool *isRunning = &info->isRunning;
-
- while ( *isRunning == true ) {
- pthread_testcancel();
- object->callbackEvent();
- }
-
- pthread_exit( NULL );
-}
-
-//******************** End of __LINUX_OSS__ *********************//
-#endif
-
-
-// *************************************************** //
-//
-// Protected common (OS-independent) RtAudio methods.
-//
-// *************************************************** //
-
-// This method can be modified to control the behavior of error
-// message printing.
-void RtApi :: error( RtAudioError::Type type )
-{
- errorStream_.str(""); // clear the ostringstream
-
- RtAudioErrorCallback errorCallback = (RtAudioErrorCallback) stream_.callbackInfo.errorCallback;
- if ( errorCallback ) {
- // abortStream() can generate new error messages. Ignore them. Just keep original one.
-
- if ( firstErrorOccurred_ )
- return;
-
- firstErrorOccurred_ = true;
- const std::string errorMessage = errorText_;
-
- if ( type != RtAudioError::WARNING && stream_.state != STREAM_STOPPED) {
- stream_.callbackInfo.isRunning = false; // exit from the thread
- abortStream();
- }
-
- errorCallback( type, errorMessage );
- firstErrorOccurred_ = false;
- return;
- }
-
- if ( type == RtAudioError::WARNING && showWarnings_ == true )
- std::cerr << '\n' << errorText_ << "\n\n";
- else if ( type != RtAudioError::WARNING )
- throw( RtAudioError( errorText_, type ) );
-}
-
-void RtApi :: verifyStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApi:: a stream is not open!";
- error( RtAudioError::INVALID_USE );
- }
-}
-
-void RtApi :: clearStreamInfo()
-{
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
- stream_.sampleRate = 0;
- stream_.bufferSize = 0;
- stream_.nBuffers = 0;
- stream_.userFormat = 0;
- stream_.userInterleaved = true;
- stream_.streamTime = 0.0;
- stream_.apiHandle = 0;
- stream_.deviceBuffer = 0;
- stream_.callbackInfo.callback = 0;
- stream_.callbackInfo.userData = 0;
- stream_.callbackInfo.isRunning = false;
- stream_.callbackInfo.errorCallback = 0;
- for ( int i=0; i<2; i++ ) {
- stream_.device[i] = 11111;
- stream_.doConvertBuffer[i] = false;
- stream_.deviceInterleaved[i] = true;
- stream_.doByteSwap[i] = false;
- stream_.nUserChannels[i] = 0;
- stream_.nDeviceChannels[i] = 0;
- stream_.channelOffset[i] = 0;
- stream_.deviceFormat[i] = 0;
- stream_.latency[i] = 0;
- stream_.userBuffer[i] = 0;
- stream_.convertInfo[i].channels = 0;
- stream_.convertInfo[i].inJump = 0;
- stream_.convertInfo[i].outJump = 0;
- stream_.convertInfo[i].inFormat = 0;
- stream_.convertInfo[i].outFormat = 0;
- stream_.convertInfo[i].inOffset.clear();
- stream_.convertInfo[i].outOffset.clear();
- }
-}
-
-unsigned int RtApi :: formatBytes( RtAudioFormat format )
-{
- if ( format == RTAUDIO_SINT16 )
- return 2;
- else if ( format == RTAUDIO_SINT32 || format == RTAUDIO_FLOAT32 )
- return 4;
- else if ( format == RTAUDIO_FLOAT64 )
- return 8;
- else if ( format == RTAUDIO_SINT24 )
- return 3;
- else if ( format == RTAUDIO_SINT8 )
- return 1;
-
- errorText_ = "RtApi::formatBytes: undefined format.";
- error( RtAudioError::WARNING );
-
- return 0;
-}
-
-void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel )
-{
- if ( mode == INPUT ) { // convert device to user buffer
- stream_.convertInfo[mode].inJump = stream_.nDeviceChannels[1];
- stream_.convertInfo[mode].outJump = stream_.nUserChannels[1];
- stream_.convertInfo[mode].inFormat = stream_.deviceFormat[1];
- stream_.convertInfo[mode].outFormat = stream_.userFormat;
- }
- else { // convert user to device buffer
- stream_.convertInfo[mode].inJump = stream_.nUserChannels[0];
- stream_.convertInfo[mode].outJump = stream_.nDeviceChannels[0];
- stream_.convertInfo[mode].inFormat = stream_.userFormat;
- stream_.convertInfo[mode].outFormat = stream_.deviceFormat[0];
- }
-
- if ( stream_.convertInfo[mode].inJump < stream_.convertInfo[mode].outJump )
- stream_.convertInfo[mode].channels = stream_.convertInfo[mode].inJump;
- else
- stream_.convertInfo[mode].channels = stream_.convertInfo[mode].outJump;
-
- // Set up the interleave/deinterleave offsets.
- if ( stream_.deviceInterleaved[mode] != stream_.userInterleaved ) {
- if ( ( mode == OUTPUT && stream_.deviceInterleaved[mode] ) ||
- ( mode == INPUT && stream_.userInterleaved ) ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].outOffset.push_back( k );
- stream_.convertInfo[mode].inJump = 1;
- }
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k );
- stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].outJump = 1;
- }
- }
- }
- else { // no (de)interleaving
- if ( stream_.userInterleaved ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k );
- stream_.convertInfo[mode].outOffset.push_back( k );
- }
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].inJump = 1;
- stream_.convertInfo[mode].outJump = 1;
- }
- }
- }
-
- // Add channel offset.
- if ( firstChannel > 0 ) {
- if ( stream_.deviceInterleaved[mode] ) {
- if ( mode == OUTPUT ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].outOffset[k] += firstChannel;
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].inOffset[k] += firstChannel;
- }
- }
- else {
- if ( mode == OUTPUT ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].outOffset[k] += ( firstChannel * stream_.bufferSize );
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].inOffset[k] += ( firstChannel * stream_.bufferSize );
- }
- }
- }
-}
-
-void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info )
-{
- // This function does format conversion, input/output channel compensation, and
- // data interleaving/deinterleaving. 24-bit integers are assumed to occupy
- // the lower three bytes of a 32-bit integer.
-
- // Clear our device buffer when in/out duplex device channels are different
- if ( outBuffer == stream_.deviceBuffer && stream_.mode == DUPLEX &&
- ( stream_.nDeviceChannels[0] < stream_.nDeviceChannels[1] ) )
- memset( outBuffer, 0, stream_.bufferSize * info.outJump * formatBytes( info.outFormat ) );
-
- int j;
- if (info.outFormat == RTAUDIO_FLOAT64) {
- Float64 scale;
- Float64 *out = (Float64 *)outBuffer;
-
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- scale = 1.0 / 127.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- scale = 1.0 / 32767.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- scale = 1.0 / 8388607.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) (in[info.inOffset[j]].asInt());
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- scale = 1.0 / 2147483647.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- // Channel compensation and/or (de)interleaving only.
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_FLOAT32) {
- Float32 scale;
- Float32 *out = (Float32 *)outBuffer;
-
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- scale = (Float32) ( 1.0 / 127.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- scale = (Float32) ( 1.0 / 32767.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- scale = (Float32) ( 1.0 / 8388607.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) (in[info.inOffset[j]].asInt());
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- scale = (Float32) ( 1.0 / 2147483647.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- // Channel compensation and/or (de)interleaving only.
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT32) {
- Int32 *out = (Int32 *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
- out[info.outOffset[j]] <<= 24;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
- out[info.outOffset[j]] <<= 16;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) in[info.inOffset[j]].asInt();
- out[info.outOffset[j]] <<= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- // Channel compensation and/or (de)interleaving only.
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT24) {
- Int24 *out = (Int24 *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 16);
- //out[info.outOffset[j]] <<= 16;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 8);
- //out[info.outOffset[j]] <<= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- // Channel compensation and/or (de)interleaving only.
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] >> 8);
- //out[info.outOffset[j]] >>= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT16) {
- Int16 *out = (Int16 *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) in[info.inOffset[j]];
- out[info.outOffset[j]] <<= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- // Channel compensation and/or (de)interleaving only.
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]].asInt() >> 8);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) ((in[info.inOffset[j]] >> 16) & 0x0000ffff);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT8) {
- signed char *out = (signed char *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- // Channel compensation and/or (de)interleaving only.
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 8) & 0x00ff);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]].asInt() >> 16);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 24) & 0x000000ff);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
-}
-
-//static inline uint16_t bswap_16(uint16_t x) { return (x>>8) | (x<<8); }
-//static inline uint32_t bswap_32(uint32_t x) { return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16)); }
-//static inline uint64_t bswap_64(uint64_t x) { return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32)); }
-
-void RtApi :: byteSwapBuffer( char *buffer, unsigned int samples, RtAudioFormat format )
-{
- register char val;
- register char *ptr;
-
- ptr = buffer;
- if ( format == RTAUDIO_SINT16 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 2nd bytes.
- val = *(ptr);
- *(ptr) = *(ptr+1);
- *(ptr+1) = val;
-
- // Increment 2 bytes.
- ptr += 2;
- }
- }
- else if ( format == RTAUDIO_SINT32 ||
- format == RTAUDIO_FLOAT32 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 4th bytes.
- val = *(ptr);
- *(ptr) = *(ptr+3);
- *(ptr+3) = val;
-
- // Swap 2nd and 3rd bytes.
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+1);
- *(ptr+1) = val;
-
- // Increment 3 more bytes.
- ptr += 3;
- }
- }
- else if ( format == RTAUDIO_SINT24 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 3rd bytes.
- val = *(ptr);
- *(ptr) = *(ptr+2);
- *(ptr+2) = val;
-
- // Increment 2 more bytes.
- ptr += 2;
- }
- }
- else if ( format == RTAUDIO_FLOAT64 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 8th bytes
- val = *(ptr);
- *(ptr) = *(ptr+7);
- *(ptr+7) = val;
-
- // Swap 2nd and 7th bytes
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+5);
- *(ptr+5) = val;
-
- // Swap 3rd and 6th bytes
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+3);
- *(ptr+3) = val;
-
- // Swap 4th and 5th bytes
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+1);
- *(ptr+1) = val;
-
- // Increment 5 more bytes.
- ptr += 5;
- }
- }
-}
-
- // Indentation settings for Vim and Emacs
- //
- // Local Variables:
- // c-basic-offset: 2
- // indent-tabs-mode: nil
- // End:
- //
- // vim: et sts=2 sw=2
-
-#endif
+#ifdef RTAUDIO_ENABLED
+/************************************************************************/
+/*! \class RtAudio
+ \brief Realtime audio i/o C++ classes.
+
+ RtAudio provides a common API (Application Programming Interface)
+ for realtime audio input/output across Linux (native ALSA, Jack,
+ and OSS), Macintosh OS X (CoreAudio and Jack), and Windows
+ (DirectSound, ASIO and WASAPI) operating systems.
+
+ RtAudio WWW site: http://www.music.mcgill.ca/~gary/rtaudio/
+
+ RtAudio: realtime audio i/o C++ classes
+ Copyright (c) 2001-2014 Gary P. Scavone
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ Any person wishing to distribute modifications to the Software is
+ asked to send the modifications to the original developer so that
+ they can be incorporated into the canonical version. This is,
+ however, not a binding provision of this license.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
+ ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/************************************************************************/
+
+// RtAudio: Version 4.1.1
+
+#include "RtAudio.h"
+#include <iostream>
+#include <cstdlib>
+#include <cstring>
+#include <climits>
+#include <algorithm>
+
+// Static variable definitions.
+const unsigned int RtApi::MAX_SAMPLE_RATES = 14;
+const unsigned int RtApi::SAMPLE_RATES[] = {
+ 4000, 5512, 8000, 9600, 11025, 16000, 22050,
+ 32000, 44100, 48000, 88200, 96000, 176400, 192000
+};
+
+#if defined(__WINDOWS_DS__) || defined(__WINDOWS_ASIO__) || defined(__WINDOWS_WASAPI__)
+#ifdef WINRT_ENABLED
+ #define MUTEX_INITIALIZE(A) InitializeCriticalSectionEx(A, 0, 0)
+#else
+ #define MUTEX_INITIALIZE(A) InitializeCriticalSection(A)
+#endif
+ #define MUTEX_DESTROY(A) DeleteCriticalSection(A)
+ #define MUTEX_LOCK(A) EnterCriticalSection(A)
+ #define MUTEX_UNLOCK(A) LeaveCriticalSection(A)
+
+ #include "tchar.h"
+
+ static std::string convertCharPointerToStdString(const char *text)
+ {
+ return std::string(text);
+ }
+
+ static std::string convertCharPointerToStdString(const wchar_t *text)
+ {
+ int length = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
+ std::string s( length-1, '\0' );
+ WideCharToMultiByte(CP_UTF8, 0, text, -1, &s[0], length, NULL, NULL);
+ return s;
+ }
+
+#elif defined(__LINUX_ALSA__) || defined(__LINUX_PULSE__) || defined(__UNIX_JACK__) || defined(__LINUX_OSS__) || defined(__MACOSX_CORE__)
+ // pthread API
+ #define MUTEX_INITIALIZE(A) pthread_mutex_init(A, NULL)
+ #define MUTEX_DESTROY(A) pthread_mutex_destroy(A)
+ #define MUTEX_LOCK(A) pthread_mutex_lock(A)
+ #define MUTEX_UNLOCK(A) pthread_mutex_unlock(A)
+#else
+ #define MUTEX_INITIALIZE(A) abs(*A) // dummy definitions
+ #define MUTEX_DESTROY(A) abs(*A) // dummy definitions
+#endif
+
+// *************************************************** //
+//
+// RtAudio definitions.
+//
+// *************************************************** //
+
+std::string RtAudio :: getVersion( void ) throw()
+{
+ return RTAUDIO_VERSION;
+}
+
+void RtAudio :: getCompiledApi( std::vector<RtAudio::Api> &apis ) throw()
+{
+ apis.clear();
+
+ // The order here will control the order of RtAudio's API search in
+ // the constructor.
+#if defined(__UNIX_JACK__)
+ apis.push_back( UNIX_JACK );
+#endif
+#if defined(__LINUX_ALSA__)
+ apis.push_back( LINUX_ALSA );
+#endif
+#if defined(__LINUX_PULSE__)
+ apis.push_back( LINUX_PULSE );
+#endif
+#if defined(__LINUX_OSS__)
+ apis.push_back( LINUX_OSS );
+#endif
+#if defined(__WINDOWS_ASIO__)
+ apis.push_back( WINDOWS_ASIO );
+#endif
+#if defined(__WINDOWS_WASAPI__)
+ apis.push_back( WINDOWS_WASAPI );
+#endif
+#if defined(__WINDOWS_DS__)
+ apis.push_back( WINDOWS_DS );
+#endif
+#if defined(__MACOSX_CORE__)
+ apis.push_back( MACOSX_CORE );
+#endif
+#if defined(__RTAUDIO_DUMMY__)
+ apis.push_back( RTAUDIO_DUMMY );
+#endif
+}
+
+void RtAudio :: openRtApi( RtAudio::Api api )
+{
+ if ( rtapi_ )
+ delete rtapi_;
+ rtapi_ = 0;
+
+#if defined(__UNIX_JACK__)
+ if ( api == UNIX_JACK )
+ rtapi_ = new RtApiJack();
+#endif
+#if defined(__LINUX_ALSA__)
+ if ( api == LINUX_ALSA )
+ rtapi_ = new RtApiAlsa();
+#endif
+#if defined(__LINUX_PULSE__)
+ if ( api == LINUX_PULSE )
+ rtapi_ = new RtApiPulse();
+#endif
+#if defined(__LINUX_OSS__)
+ if ( api == LINUX_OSS )
+ rtapi_ = new RtApiOss();
+#endif
+#if defined(__WINDOWS_ASIO__)
+ if ( api == WINDOWS_ASIO )
+ rtapi_ = new RtApiAsio();
+#endif
+#if defined(__WINDOWS_WASAPI__)
+ if ( api == WINDOWS_WASAPI )
+ rtapi_ = new RtApiWasapi();
+#endif
+#if defined(__WINDOWS_DS__)
+ if ( api == WINDOWS_DS )
+ rtapi_ = new RtApiDs();
+#endif
+#if defined(__MACOSX_CORE__)
+ if ( api == MACOSX_CORE )
+ rtapi_ = new RtApiCore();
+#endif
+#if defined(__RTAUDIO_DUMMY__)
+ if ( api == RTAUDIO_DUMMY )
+ rtapi_ = new RtApiDummy();
+#endif
+}
+
+RtAudio :: RtAudio( RtAudio::Api api )
+{
+ rtapi_ = 0;
+
+ if ( api != UNSPECIFIED ) {
+ // Attempt to open the specified API.
+ openRtApi( api );
+ if ( rtapi_ ) return;
+
+ // No compiled support for specified API value. Issue a debug
+ // warning and continue as if no API was specified.
+ std::cerr << "\nRtAudio: no compiled support for specified API argument!\n" << std::endl;
+ }
+
+ // Iterate through the compiled APIs and return as soon as we find
+ // one with at least one device or we reach the end of the list.
+ std::vector< RtAudio::Api > apis;
+ getCompiledApi( apis );
+ for ( unsigned int i=0; i<apis.size(); i++ ) {
+ openRtApi( apis[i] );
+ if ( rtapi_ && rtapi_->getDeviceCount() ) break;
+ }
+
+ if ( rtapi_ ) return;
+
+ // It should not be possible to get here because the preprocessor
+ // definition __RTAUDIO_DUMMY__ is automatically defined if no
+ // API-specific definitions are passed to the compiler. But just in
+ // case something weird happens, we'll thow an error.
+ std::string errorText = "\nRtAudio: no compiled API support found ... critical error!!\n\n";
+ throw( RtAudioError( errorText, RtAudioError::UNSPECIFIED ) );
+}
+
+RtAudio :: ~RtAudio() throw()
+{
+ if ( rtapi_ )
+ delete rtapi_;
+}
+
+void RtAudio :: openStream( RtAudio::StreamParameters *outputParameters,
+ RtAudio::StreamParameters *inputParameters,
+ RtAudioFormat format, unsigned int sampleRate,
+ unsigned int *bufferFrames,
+ RtAudioCallback callback, void *userData,
+ RtAudio::StreamOptions *options,
+ RtAudioErrorCallback errorCallback )
+{
+ return rtapi_->openStream( outputParameters, inputParameters, format,
+ sampleRate, bufferFrames, callback,
+ userData, options, errorCallback );
+}
+
+// *************************************************** //
+//
+// Public RtApi definitions (see end of file for
+// private or protected utility functions).
+//
+// *************************************************** //
+
+RtApi :: RtApi()
+{
+ stream_.state = STREAM_CLOSED;
+ stream_.mode = UNINITIALIZED;
+ stream_.apiHandle = 0;
+ stream_.userBuffer[0] = 0;
+ stream_.userBuffer[1] = 0;
+ MUTEX_INITIALIZE( &stream_.mutex );
+ showWarnings_ = true;
+ firstErrorOccurred_ = false;
+}
+
+RtApi :: ~RtApi()
+{
+ MUTEX_DESTROY( &stream_.mutex );
+}
+
+void RtApi :: openStream( RtAudio::StreamParameters *oParams,
+ RtAudio::StreamParameters *iParams,
+ RtAudioFormat format, unsigned int sampleRate,
+ unsigned int *bufferFrames,
+ RtAudioCallback callback, void *userData,
+ RtAudio::StreamOptions *options,
+ RtAudioErrorCallback errorCallback )
+{
+ if ( stream_.state != STREAM_CLOSED ) {
+ errorText_ = "RtApi::openStream: a stream is already open!";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+
+ // Clear stream information potentially left from a previously open stream.
+ clearStreamInfo();
+
+ if ( oParams && oParams->nChannels < 1 ) {
+ errorText_ = "RtApi::openStream: a non-NULL output StreamParameters structure cannot have an nChannels value less than one.";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+
+ if ( iParams && iParams->nChannels < 1 ) {
+ errorText_ = "RtApi::openStream: a non-NULL input StreamParameters structure cannot have an nChannels value less than one.";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+
+ if ( oParams == NULL && iParams == NULL ) {
+ errorText_ = "RtApi::openStream: input and output StreamParameters structures are both NULL!";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+
+ if ( formatBytes(format) == 0 ) {
+ errorText_ = "RtApi::openStream: 'format' parameter value is undefined.";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+
+ unsigned int nDevices = getDeviceCount();
+ unsigned int oChannels = 0;
+ if ( oParams ) {
+ oChannels = oParams->nChannels;
+ if ( oParams->deviceId >= nDevices ) {
+ errorText_ = "RtApi::openStream: output device parameter value is invalid.";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+ }
+
+ unsigned int iChannels = 0;
+ if ( iParams ) {
+ iChannels = iParams->nChannels;
+ if ( iParams->deviceId >= nDevices ) {
+ errorText_ = "RtApi::openStream: input device parameter value is invalid.";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+ }
+
+ bool result;
+
+ if ( oChannels > 0 ) {
+
+ result = probeDeviceOpen( oParams->deviceId, OUTPUT, oChannels, oParams->firstChannel,
+ sampleRate, format, bufferFrames, options );
+ if ( result == false ) {
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ }
+
+ if ( iChannels > 0 ) {
+
+ result = probeDeviceOpen( iParams->deviceId, INPUT, iChannels, iParams->firstChannel,
+ sampleRate, format, bufferFrames, options );
+ if ( result == false ) {
+ if ( oChannels > 0 ) closeStream();
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ }
+
+ stream_.callbackInfo.callback = (void *) callback;
+ stream_.callbackInfo.userData = userData;
+ stream_.callbackInfo.errorCallback = (void *) errorCallback;
+
+ if ( options ) options->numberOfBuffers = stream_.nBuffers;
+ stream_.state = STREAM_STOPPED;
+}
+
+unsigned int RtApi :: getDefaultInputDevice( void )
+{
+ // Should be implemented in subclasses if possible.
+ return 0;
+}
+
+unsigned int RtApi :: getDefaultOutputDevice( void )
+{
+ // Should be implemented in subclasses if possible.
+ return 0;
+}
+
+void RtApi :: closeStream( void )
+{
+ // MUST be implemented in subclasses!
+ return;
+}
+
+bool RtApi :: probeDeviceOpen( unsigned int /*device*/, StreamMode /*mode*/, unsigned int /*channels*/,
+ unsigned int /*firstChannel*/, unsigned int /*sampleRate*/,
+ RtAudioFormat /*format*/, unsigned int * /*bufferSize*/,
+ RtAudio::StreamOptions * /*options*/ )
+{
+ // MUST be implemented in subclasses!
+ return FAILURE;
+}
+
+void RtApi :: tickStreamTime( void )
+{
+ // Subclasses that do not provide their own implementation of
+ // getStreamTime should call this function once per buffer I/O to
+ // provide basic stream time support.
+
+ stream_.streamTime += ( stream_.bufferSize * 1.0 / stream_.sampleRate );
+
+#if defined( HAVE_GETTIMEOFDAY )
+ gettimeofday( &stream_.lastTickTimestamp, NULL );
+#endif
+}
+
+long RtApi :: getStreamLatency( void )
+{
+ verifyStream();
+
+ long totalLatency = 0;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
+ totalLatency = stream_.latency[0];
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
+ totalLatency += stream_.latency[1];
+
+ return totalLatency;
+}
+
+double RtApi :: getStreamTime( void )
+{
+ verifyStream();
+
+#if defined( HAVE_GETTIMEOFDAY )
+ // Return a very accurate estimate of the stream time by
+ // adding in the elapsed time since the last tick.
+ struct timeval then;
+ struct timeval now;
+
+ if ( stream_.state != STREAM_RUNNING || stream_.streamTime == 0.0 )
+ return stream_.streamTime;
+
+ gettimeofday( &now, NULL );
+ then = stream_.lastTickTimestamp;
+ return stream_.streamTime +
+ ((now.tv_sec + 0.000001 * now.tv_usec) -
+ (then.tv_sec + 0.000001 * then.tv_usec));
+#else
+ return stream_.streamTime;
+#endif
+}
+
+void RtApi :: setStreamTime( double time )
+{
+ verifyStream();
+
+ if ( time >= 0.0 )
+ stream_.streamTime = time;
+}
+
+unsigned int RtApi :: getStreamSampleRate( void )
+{
+ verifyStream();
+
+ return stream_.sampleRate;
+}
+
+
+// *************************************************** //
+//
+// OS/API-specific methods.
+//
+// *************************************************** //
+
+#if defined(__MACOSX_CORE__)
+
+// The OS X CoreAudio API is designed to use a separate callback
+// procedure for each of its audio devices. A single RtAudio duplex
+// stream using two different devices is supported here, though it
+// cannot be guaranteed to always behave correctly because we cannot
+// synchronize these two callbacks.
+//
+// A property listener is installed for over/underrun information.
+// However, no functionality is currently provided to allow property
+// listeners to trigger user handlers because it is unclear what could
+// be done if a critical stream parameter (buffer size, sample rate,
+// device disconnect) notification arrived. The listeners entail
+// quite a bit of extra code and most likely, a user program wouldn't
+// be prepared for the result anyway. However, we do provide a flag
+// to the client callback function to inform of an over/underrun.
+
+// A structure to hold various information related to the CoreAudio API
+// implementation.
+struct CoreHandle {
+ AudioDeviceID id[2]; // device ids
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+ AudioDeviceIOProcID procId[2];
+#endif
+ UInt32 iStream[2]; // device stream index (or first if using multiple)
+ UInt32 nStreams[2]; // number of streams to use
+ bool xrun[2];
+ char *deviceBuffer;
+ pthread_cond_t condition;
+ int drainCounter; // Tracks callback counts when draining
+ bool internalDrain; // Indicates if stop is initiated from callback or not.
+
+ CoreHandle()
+ :deviceBuffer(0), drainCounter(0), internalDrain(false) { nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
+};
+
+RtApiCore:: RtApiCore()
+{
+#if defined( AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER )
+ // This is a largely undocumented but absolutely necessary
+ // requirement starting with OS-X 10.6. If not called, queries and
+ // updates to various audio device properties are not handled
+ // correctly.
+ CFRunLoopRef theRunLoop = NULL;
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyRunLoop,
+ kAudioObjectPropertyScopeGlobal,
+ kAudioObjectPropertyElementMaster };
+ OSStatus result = AudioObjectSetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, sizeof(CFRunLoopRef), &theRunLoop);
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::RtApiCore: error setting run loop property!";
+ error( RtAudioError::WARNING );
+ }
+#endif
+}
+
+RtApiCore :: ~RtApiCore()
+{
+ // The subclass destructor gets called before the base class
+ // destructor, so close an existing stream before deallocating
+ // apiDeviceId memory.
+ if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiCore :: getDeviceCount( void )
+{
+ // Find out how many audio devices there are, if any.
+ UInt32 dataSize;
+ AudioObjectPropertyAddress propertyAddress = { kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+ OSStatus result = AudioObjectGetPropertyDataSize( kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize );
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::getDeviceCount: OS-X error getting device info!";
+ error( RtAudioError::WARNING );
+ return 0;
+ }
+
+ return dataSize / sizeof( AudioDeviceID );
+}
+
+unsigned int RtApiCore :: getDefaultInputDevice( void )
+{
+ unsigned int nDevices = getDeviceCount();
+ if ( nDevices <= 1 ) return 0;
+
+ AudioDeviceID id;
+ UInt32 dataSize = sizeof( AudioDeviceID );
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+ OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device.";
+ error( RtAudioError::WARNING );
+ return 0;
+ }
+
+ dataSize *= nDevices;
+ AudioDeviceID deviceList[ nDevices ];
+ property.mSelector = kAudioHardwarePropertyDevices;
+ result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device IDs.";
+ error( RtAudioError::WARNING );
+ return 0;
+ }
+
+ for ( unsigned int i=0; i<nDevices; i++ )
+ if ( id == deviceList[i] ) return i;
+
+ errorText_ = "RtApiCore::getDefaultInputDevice: No default device found!";
+ error( RtAudioError::WARNING );
+ return 0;
+}
+
+unsigned int RtApiCore :: getDefaultOutputDevice( void )
+{
+ unsigned int nDevices = getDeviceCount();
+ if ( nDevices <= 1 ) return 0;
+
+ AudioDeviceID id;
+ UInt32 dataSize = sizeof( AudioDeviceID );
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+ OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device.";
+ error( RtAudioError::WARNING );
+ return 0;
+ }
+
+ dataSize = sizeof( AudioDeviceID ) * nDevices;
+ AudioDeviceID deviceList[ nDevices ];
+ property.mSelector = kAudioHardwarePropertyDevices;
+ result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device IDs.";
+ error( RtAudioError::WARNING );
+ return 0;
+ }
+
+ for ( unsigned int i=0; i<nDevices; i++ )
+ if ( id == deviceList[i] ) return i;
+
+ errorText_ = "RtApiCore::getDefaultOutputDevice: No default device found!";
+ error( RtAudioError::WARNING );
+ return 0;
+}
+
+RtAudio::DeviceInfo RtApiCore :: getDeviceInfo( unsigned int device )
+{
+ RtAudio::DeviceInfo info;
+ info.probed = false;
+
+ // Get device ID
+ unsigned int nDevices = getDeviceCount();
+ if ( nDevices == 0 ) {
+ errorText_ = "RtApiCore::getDeviceInfo: no devices found!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ if ( device >= nDevices ) {
+ errorText_ = "RtApiCore::getDeviceInfo: device ID is invalid!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ AudioDeviceID deviceList[ nDevices ];
+ UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+ kAudioObjectPropertyScopeGlobal,
+ kAudioObjectPropertyElementMaster };
+ OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
+ 0, NULL, &dataSize, (void *) &deviceList );
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::getDeviceInfo: OS-X system error getting device IDs.";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ AudioDeviceID id = deviceList[ device ];
+
+ // Get the device name.
+ info.name.erase();
+ CFStringRef cfname;
+ dataSize = sizeof( CFStringRef );
+ property.mSelector = kAudioObjectPropertyManufacturer;
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device manufacturer.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ //const char *mname = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
+ int length = CFStringGetLength(cfname);
+ char *mname = (char *)malloc(length * 3 + 1);
+#if defined( UNICODE ) || defined( _UNICODE )
+ CFStringGetCString(cfname, mname, length * 3 + 1, kCFStringEncodingUTF8);
+#else
+ CFStringGetCString(cfname, mname, length * 3 + 1, CFStringGetSystemEncoding());
+#endif
+ info.name.append( (const char *)mname, strlen(mname) );
+ info.name.append( ": " );
+ CFRelease( cfname );
+ free(mname);
+
+ property.mSelector = kAudioObjectPropertyName;
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device name.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ //const char *name = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
+ length = CFStringGetLength(cfname);
+ char *name = (char *)malloc(length * 3 + 1);
+#if defined( UNICODE ) || defined( _UNICODE )
+ CFStringGetCString(cfname, name, length * 3 + 1, kCFStringEncodingUTF8);
+#else
+ CFStringGetCString(cfname, name, length * 3 + 1, CFStringGetSystemEncoding());
+#endif
+ info.name.append( (const char *)name, strlen(name) );
+ CFRelease( cfname );
+ free(name);
+
+ // Get the output stream "configuration".
+ AudioBufferList *bufferList = nil;
+ property.mSelector = kAudioDevicePropertyStreamConfiguration;
+ property.mScope = kAudioDevicePropertyScopeOutput;
+ // property.mElement = kAudioObjectPropertyElementWildcard;
+ dataSize = 0;
+ result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+ if ( result != noErr || dataSize == 0 ) {
+ errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration info for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Allocate the AudioBufferList.
+ bufferList = (AudioBufferList *) malloc( dataSize );
+ if ( bufferList == NULL ) {
+ errorText_ = "RtApiCore::getDeviceInfo: memory error allocating output AudioBufferList.";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
+ if ( result != noErr || dataSize == 0 ) {
+ free( bufferList );
+ errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Get output channel information.
+ unsigned int i, nStreams = bufferList->mNumberBuffers;
+ for ( i=0; i<nStreams; i++ )
+ info.outputChannels += bufferList->mBuffers[i].mNumberChannels;
+ free( bufferList );
+
+ // Get the input stream "configuration".
+ property.mScope = kAudioDevicePropertyScopeInput;
+ result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+ if ( result != noErr || dataSize == 0 ) {
+ errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration info for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Allocate the AudioBufferList.
+ bufferList = (AudioBufferList *) malloc( dataSize );
+ if ( bufferList == NULL ) {
+ errorText_ = "RtApiCore::getDeviceInfo: memory error allocating input AudioBufferList.";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
+ if (result != noErr || dataSize == 0) {
+ free( bufferList );
+ errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Get input channel information.
+ nStreams = bufferList->mNumberBuffers;
+ for ( i=0; i<nStreams; i++ )
+ info.inputChannels += bufferList->mBuffers[i].mNumberChannels;
+ free( bufferList );
+
+ // If device opens for both playback and capture, we determine the channels.
+ if ( info.outputChannels > 0 && info.inputChannels > 0 )
+ info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+ // Probe the device sample rates.
+ bool isInput = false;
+ if ( info.outputChannels == 0 ) isInput = true;
+
+ // Determine the supported sample rates.
+ property.mSelector = kAudioDevicePropertyAvailableNominalSampleRates;
+ if ( isInput == false ) property.mScope = kAudioDevicePropertyScopeOutput;
+ result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+ if ( result != kAudioHardwareNoError || dataSize == 0 ) {
+ errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rate info.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ UInt32 nRanges = dataSize / sizeof( AudioValueRange );
+ AudioValueRange rangeList[ nRanges ];
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &rangeList );
+ if ( result != kAudioHardwareNoError ) {
+ errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rates.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // The sample rate reporting mechanism is a bit of a mystery. It
+ // seems that it can either return individual rates or a range of
+ // rates. I assume that if the min / max range values are the same,
+ // then that represents a single supported rate and if the min / max
+ // range values are different, the device supports an arbitrary
+ // range of values (though there might be multiple ranges, so we'll
+ // use the most conservative range).
+ Float64 minimumRate = 1.0, maximumRate = 10000000000.0;
+ bool haveValueRange = false;
+ info.sampleRates.clear();
+ for ( UInt32 i=0; i<nRanges; i++ ) {
+ if ( rangeList[i].mMinimum == rangeList[i].mMaximum ) {
+ unsigned int tmpSr = (unsigned int) rangeList[i].mMinimum;
+ info.sampleRates.push_back( tmpSr );
+
+ if ( !info.preferredSampleRate || ( tmpSr <= 48000 && tmpSr > info.preferredSampleRate ) )
+ info.preferredSampleRate = tmpSr;
+
+ } else {
+ haveValueRange = true;
+ if ( rangeList[i].mMinimum > minimumRate ) minimumRate = rangeList[i].mMinimum;
+ if ( rangeList[i].mMaximum < maximumRate ) maximumRate = rangeList[i].mMaximum;
+ }
+ }
+
+ if ( haveValueRange ) {
+ for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+ if ( SAMPLE_RATES[k] >= (unsigned int) minimumRate && SAMPLE_RATES[k] <= (unsigned int) maximumRate ) {
+ info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+ if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+ info.preferredSampleRate = SAMPLE_RATES[k];
+ }
+ }
+ }
+
+ // Sort and remove any redundant values
+ std::sort( info.sampleRates.begin(), info.sampleRates.end() );
+ info.sampleRates.erase( unique( info.sampleRates.begin(), info.sampleRates.end() ), info.sampleRates.end() );
+
+ if ( info.sampleRates.size() == 0 ) {
+ errorStream_ << "RtApiCore::probeDeviceInfo: No supported sample rates found for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // CoreAudio always uses 32-bit floating point data for PCM streams.
+ // Thus, any other "physical" formats supported by the device are of
+ // no interest to the client.
+ info.nativeFormats = RTAUDIO_FLOAT32;
+
+ if ( info.outputChannels > 0 )
+ if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
+ if ( info.inputChannels > 0 )
+ if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
+
+ info.probed = true;
+ return info;
+}
+
+static OSStatus callbackHandler( AudioDeviceID inDevice,
+ const AudioTimeStamp* /*inNow*/,
+ const AudioBufferList* inInputData,
+ const AudioTimeStamp* /*inInputTime*/,
+ AudioBufferList* outOutputData,
+ const AudioTimeStamp* /*inOutputTime*/,
+ void* infoPointer )
+{
+ CallbackInfo *info = (CallbackInfo *) infoPointer;
+
+ RtApiCore *object = (RtApiCore *) info->object;
+ if ( object->callbackEvent( inDevice, inInputData, outOutputData ) == false )
+ return kAudioHardwareUnspecifiedError;
+ else
+ return kAudioHardwareNoError;
+}
+
+static OSStatus xrunListener( AudioObjectID /*inDevice*/,
+ UInt32 nAddresses,
+ const AudioObjectPropertyAddress properties[],
+ void* handlePointer )
+{
+ CoreHandle *handle = (CoreHandle *) handlePointer;
+ for ( UInt32 i=0; i<nAddresses; i++ ) {
+ if ( properties[i].mSelector == kAudioDeviceProcessorOverload ) {
+ if ( properties[i].mScope == kAudioDevicePropertyScopeInput )
+ handle->xrun[1] = true;
+ else
+ handle->xrun[0] = true;
+ }
+ }
+
+ return kAudioHardwareNoError;
+}
+
+static OSStatus rateListener( AudioObjectID inDevice,
+ UInt32 /*nAddresses*/,
+ const AudioObjectPropertyAddress /*properties*/[],
+ void* ratePointer )
+{
+ Float64 *rate = (Float64 *) ratePointer;
+ UInt32 dataSize = sizeof( Float64 );
+ AudioObjectPropertyAddress property = { kAudioDevicePropertyNominalSampleRate,
+ kAudioObjectPropertyScopeGlobal,
+ kAudioObjectPropertyElementMaster };
+ AudioObjectGetPropertyData( inDevice, &property, 0, NULL, &dataSize, rate );
+ return kAudioHardwareNoError;
+}
+
+bool RtApiCore :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+ unsigned int firstChannel, unsigned int sampleRate,
+ RtAudioFormat format, unsigned int *bufferSize,
+ RtAudio::StreamOptions *options )
+{
+ // Get device ID
+ unsigned int nDevices = getDeviceCount();
+ if ( nDevices == 0 ) {
+ // This should not happen because a check is made before this function is called.
+ errorText_ = "RtApiCore::probeDeviceOpen: no devices found!";
+ return FAILURE;
+ }
+
+ if ( device >= nDevices ) {
+ // This should not happen because a check is made before this function is called.
+ errorText_ = "RtApiCore::probeDeviceOpen: device ID is invalid!";
+ return FAILURE;
+ }
+
+ AudioDeviceID deviceList[ nDevices ];
+ UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+ kAudioObjectPropertyScopeGlobal,
+ kAudioObjectPropertyElementMaster };
+ OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
+ 0, NULL, &dataSize, (void *) &deviceList );
+ if ( result != noErr ) {
+ errorText_ = "RtApiCore::probeDeviceOpen: OS-X system error getting device IDs.";
+ return FAILURE;
+ }
+
+ AudioDeviceID id = deviceList[ device ];
+
+ // Setup for stream mode.
+ bool isInput = false;
+ if ( mode == INPUT ) {
+ isInput = true;
+ property.mScope = kAudioDevicePropertyScopeInput;
+ }
+ else
+ property.mScope = kAudioDevicePropertyScopeOutput;
+
+ // Get the stream "configuration".
+ AudioBufferList *bufferList = nil;
+ dataSize = 0;
+ property.mSelector = kAudioDevicePropertyStreamConfiguration;
+ result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+ if ( result != noErr || dataSize == 0 ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration info for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Allocate the AudioBufferList.
+ bufferList = (AudioBufferList *) malloc( dataSize );
+ if ( bufferList == NULL ) {
+ errorText_ = "RtApiCore::probeDeviceOpen: memory error allocating AudioBufferList.";
+ return FAILURE;
+ }
+
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
+ if (result != noErr || dataSize == 0) {
+ free( bufferList );
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Search for one or more streams that contain the desired number of
+ // channels. CoreAudio devices can have an arbitrary number of
+ // streams and each stream can have an arbitrary number of channels.
+ // For each stream, a single buffer of interleaved samples is
+ // provided. RtAudio prefers the use of one stream of interleaved
+ // data or multiple consecutive single-channel streams. However, we
+ // now support multiple consecutive multi-channel streams of
+ // interleaved data as well.
+ UInt32 iStream, offsetCounter = firstChannel;
+ UInt32 nStreams = bufferList->mNumberBuffers;
+ bool monoMode = false;
+ bool foundStream = false;
+
+ // First check that the device supports the requested number of
+ // channels.
+ UInt32 deviceChannels = 0;
+ for ( iStream=0; iStream<nStreams; iStream++ )
+ deviceChannels += bufferList->mBuffers[iStream].mNumberChannels;
+
+ if ( deviceChannels < ( channels + firstChannel ) ) {
+ free( bufferList );
+ errorStream_ << "RtApiCore::probeDeviceOpen: the device (" << device << ") does not support the requested channel count.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Look for a single stream meeting our needs.
+ UInt32 firstStream, streamCount = 1, streamChannels = 0, channelOffset = 0;
+ for ( iStream=0; iStream<nStreams; iStream++ ) {
+ streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
+ if ( streamChannels >= channels + offsetCounter ) {
+ firstStream = iStream;
+ channelOffset = offsetCounter;
+ foundStream = true;
+ break;
+ }
+ if ( streamChannels > offsetCounter ) break;
+ offsetCounter -= streamChannels;
+ }
+
+ // If we didn't find a single stream above, then we should be able
+ // to meet the channel specification with multiple streams.
+ if ( foundStream == false ) {
+ monoMode = true;
+ offsetCounter = firstChannel;
+ for ( iStream=0; iStream<nStreams; iStream++ ) {
+ streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
+ if ( streamChannels > offsetCounter ) break;
+ offsetCounter -= streamChannels;
+ }
+
+ firstStream = iStream;
+ channelOffset = offsetCounter;
+ Int32 channelCounter = channels + offsetCounter - streamChannels;
+
+ if ( streamChannels > 1 ) monoMode = false;
+ while ( channelCounter > 0 ) {
+ streamChannels = bufferList->mBuffers[++iStream].mNumberChannels;
+ if ( streamChannels > 1 ) monoMode = false;
+ channelCounter -= streamChannels;
+ streamCount++;
+ }
+ }
+
+ free( bufferList );
+
+ // Determine the buffer size.
+ AudioValueRange bufferRange;
+ dataSize = sizeof( AudioValueRange );
+ property.mSelector = kAudioDevicePropertyBufferFrameSizeRange;
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &bufferRange );
+
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting buffer size range for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ if ( bufferRange.mMinimum > *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMinimum;
+ else if ( bufferRange.mMaximum < *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMaximum;
+ if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) *bufferSize = (unsigned long) bufferRange.mMinimum;
+
+ // Set the buffer size. For multiple streams, I'm assuming we only
+ // need to make this setting for the master channel.
+ UInt32 theSize = (UInt32) *bufferSize;
+ dataSize = sizeof( UInt32 );
+ property.mSelector = kAudioDevicePropertyBufferFrameSize;
+ result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &theSize );
+
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting the buffer size for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // If attempting to setup a duplex stream, the bufferSize parameter
+ // MUST be the same in both directions!
+ *bufferSize = theSize;
+ if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ stream_.bufferSize = *bufferSize;
+ stream_.nBuffers = 1;
+
+ // Try to set "hog" mode ... it's not clear to me this is working.
+ if ( options && options->flags & RTAUDIO_HOG_DEVICE ) {
+ pid_t hog_pid;
+ dataSize = sizeof( hog_pid );
+ property.mSelector = kAudioDevicePropertyHogMode;
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &hog_pid );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting 'hog' state!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ if ( hog_pid != getpid() ) {
+ hog_pid = getpid();
+ result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &hog_pid );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting 'hog' state!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+ }
+
+ // Check and if necessary, change the sample rate for the device.
+ Float64 nominalRate;
+ dataSize = sizeof( Float64 );
+ property.mSelector = kAudioDevicePropertyNominalSampleRate;
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &nominalRate );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting current sample rate.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Only change the sample rate if off by more than 1 Hz.
+ if ( fabs( nominalRate - (double)sampleRate ) > 1.0 ) {
+
+ // Set a property listener for the sample rate change
+ Float64 reportedRate = 0.0;
+ AudioObjectPropertyAddress tmp = { kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+ result = AudioObjectAddPropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate property listener for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ nominalRate = (Float64) sampleRate;
+ result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &nominalRate );
+ if ( result != noErr ) {
+ AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Now wait until the reported nominal rate is what we just set.
+ UInt32 microCounter = 0;
+ while ( reportedRate != nominalRate ) {
+ microCounter += 5000;
+ if ( microCounter > 5000000 ) break;
+ usleep( 5000 );
+ }
+
+ // Remove the property listener.
+ AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
+
+ if ( microCounter > 5000000 ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: timeout waiting for sample rate update for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+
+ // Now set the stream format for all streams. Also, check the
+ // physical format of the device and change that if necessary.
+ AudioStreamBasicDescription description;
+ dataSize = sizeof( AudioStreamBasicDescription );
+ property.mSelector = kAudioStreamPropertyVirtualFormat;
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream format for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Set the sample rate and data format id. However, only make the
+ // change if the sample rate is not within 1.0 of the desired
+ // rate and the format is not linear pcm.
+ bool updateFormat = false;
+ if ( fabs( description.mSampleRate - (Float64)sampleRate ) > 1.0 ) {
+ description.mSampleRate = (Float64) sampleRate;
+ updateFormat = true;
+ }
+
+ if ( description.mFormatID != kAudioFormatLinearPCM ) {
+ description.mFormatID = kAudioFormatLinearPCM;
+ updateFormat = true;
+ }
+
+ if ( updateFormat ) {
+ result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &description );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate or data format for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+
+ // Now check the physical format.
+ property.mSelector = kAudioStreamPropertyPhysicalFormat;
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream physical format for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ //std::cout << "Current physical stream format:" << std::endl;
+ //std::cout << " mBitsPerChan = " << description.mBitsPerChannel << std::endl;
+ //std::cout << " aligned high = " << (description.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (description.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
+ //std::cout << " bytesPerFrame = " << description.mBytesPerFrame << std::endl;
+ //std::cout << " sample rate = " << description.mSampleRate << std::endl;
+
+ if ( description.mFormatID != kAudioFormatLinearPCM || description.mBitsPerChannel < 16 ) {
+ description.mFormatID = kAudioFormatLinearPCM;
+ //description.mSampleRate = (Float64) sampleRate;
+ AudioStreamBasicDescription testDescription = description;
+ UInt32 formatFlags;
+
+ // We'll try higher bit rates first and then work our way down.
+ std::vector< std::pair<UInt32, UInt32> > physicalFormats;
+ formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsFloat) & ~kLinearPCMFormatFlagIsSignedInteger;
+ physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
+ formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
+ physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
+ physicalFormats.push_back( std::pair<Float32, UInt32>( 24, formatFlags ) ); // 24-bit packed
+ formatFlags &= ~( kAudioFormatFlagIsPacked | kAudioFormatFlagIsAlignedHigh );
+ physicalFormats.push_back( std::pair<Float32, UInt32>( 24.2, formatFlags ) ); // 24-bit in 4 bytes, aligned low
+ formatFlags |= kAudioFormatFlagIsAlignedHigh;
+ physicalFormats.push_back( std::pair<Float32, UInt32>( 24.4, formatFlags ) ); // 24-bit in 4 bytes, aligned high
+ formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
+ physicalFormats.push_back( std::pair<Float32, UInt32>( 16, formatFlags ) );
+ physicalFormats.push_back( std::pair<Float32, UInt32>( 8, formatFlags ) );
+
+ bool setPhysicalFormat = false;
+ for( unsigned int i=0; i<physicalFormats.size(); i++ ) {
+ testDescription = description;
+ testDescription.mBitsPerChannel = (UInt32) physicalFormats[i].first;
+ testDescription.mFormatFlags = physicalFormats[i].second;
+ if ( (24 == (UInt32)physicalFormats[i].first) && ~( physicalFormats[i].second & kAudioFormatFlagIsPacked ) )
+ testDescription.mBytesPerFrame = 4 * testDescription.mChannelsPerFrame;
+ else
+ testDescription.mBytesPerFrame = testDescription.mBitsPerChannel/8 * testDescription.mChannelsPerFrame;
+ testDescription.mBytesPerPacket = testDescription.mBytesPerFrame * testDescription.mFramesPerPacket;
+ result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &testDescription );
+ if ( result == noErr ) {
+ setPhysicalFormat = true;
+ //std::cout << "Updated physical stream format:" << std::endl;
+ //std::cout << " mBitsPerChan = " << testDescription.mBitsPerChannel << std::endl;
+ //std::cout << " aligned high = " << (testDescription.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (testDescription.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
+ //std::cout << " bytesPerFrame = " << testDescription.mBytesPerFrame << std::endl;
+ //std::cout << " sample rate = " << testDescription.mSampleRate << std::endl;
+ break;
+ }
+ }
+
+ if ( !setPhysicalFormat ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting physical data format for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ } // done setting virtual/physical formats.
+
+ // Get the stream / device latency.
+ UInt32 latency;
+ dataSize = sizeof( UInt32 );
+ property.mSelector = kAudioDevicePropertyLatency;
+ if ( AudioObjectHasProperty( id, &property ) == true ) {
+ result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &latency );
+ if ( result == kAudioHardwareNoError ) stream_.latency[ mode ] = latency;
+ else {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting device latency for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ }
+ }
+
+ // Byte-swapping: According to AudioHardware.h, the stream data will
+ // always be presented in native-endian format, so we should never
+ // need to byte swap.
+ stream_.doByteSwap[mode] = false;
+
+ // From the CoreAudio documentation, PCM data must be supplied as
+ // 32-bit floats.
+ stream_.userFormat = format;
+ stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+
+ if ( streamCount == 1 )
+ stream_.nDeviceChannels[mode] = description.mChannelsPerFrame;
+ else // multiple streams
+ stream_.nDeviceChannels[mode] = channels;
+ stream_.nUserChannels[mode] = channels;
+ stream_.channelOffset[mode] = channelOffset; // offset within a CoreAudio stream
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+ else stream_.userInterleaved = true;
+ stream_.deviceInterleaved[mode] = true;
+ if ( monoMode == true ) stream_.deviceInterleaved[mode] = false;
+
+ // Set flags for buffer conversion.
+ stream_.doConvertBuffer[mode] = false;
+ if ( stream_.userFormat != stream_.deviceFormat[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( streamCount == 1 ) {
+ if ( stream_.nUserChannels[mode] > 1 &&
+ stream_.userInterleaved != stream_.deviceInterleaved[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ }
+ else if ( monoMode && stream_.userInterleaved )
+ stream_.doConvertBuffer[mode] = true;
+
+ // Allocate our CoreHandle structure for the stream.
+ CoreHandle *handle = 0;
+ if ( stream_.apiHandle == 0 ) {
+ try {
+ handle = new CoreHandle;
+ }
+ catch ( std::bad_alloc& ) {
+ errorText_ = "RtApiCore::probeDeviceOpen: error allocating CoreHandle memory.";
+ goto error;
+ }
+
+ if ( pthread_cond_init( &handle->condition, NULL ) ) {
+ errorText_ = "RtApiCore::probeDeviceOpen: error initializing pthread condition variable.";
+ goto error;
+ }
+ stream_.apiHandle = (void *) handle;
+ }
+ else
+ handle = (CoreHandle *) stream_.apiHandle;
+ handle->iStream[mode] = firstStream;
+ handle->nStreams[mode] = streamCount;
+ handle->id[mode] = id;
+
+ // Allocate necessary internal buffers.
+ unsigned long bufferBytes;
+ bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+ // stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+ stream_.userBuffer[mode] = (char *) malloc( bufferBytes * sizeof(char) );
+ memset( stream_.userBuffer[mode], 0, bufferBytes * sizeof(char) );
+ if ( stream_.userBuffer[mode] == NULL ) {
+ errorText_ = "RtApiCore::probeDeviceOpen: error allocating user buffer memory.";
+ goto error;
+ }
+
+ // If possible, we will make use of the CoreAudio stream buffers as
+ // "device buffers". However, we can't do this if using multiple
+ // streams.
+ if ( stream_.doConvertBuffer[mode] && handle->nStreams[mode] > 1 ) {
+
+ bool makeBuffer = true;
+ bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+ if ( mode == INPUT ) {
+ if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+ unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+ if ( bufferBytes <= bytesOut ) makeBuffer = false;
+ }
+ }
+
+ if ( makeBuffer ) {
+ bufferBytes *= *bufferSize;
+ if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+ stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.deviceBuffer == NULL ) {
+ errorText_ = "RtApiCore::probeDeviceOpen: error allocating device buffer memory.";
+ goto error;
+ }
+ }
+ }
+
+ stream_.sampleRate = sampleRate;
+ stream_.device[mode] = device;
+ stream_.state = STREAM_STOPPED;
+ stream_.callbackInfo.object = (void *) this;
+
+ // Setup the buffer conversion information structure.
+ if ( stream_.doConvertBuffer[mode] ) {
+ if ( streamCount > 1 ) setConvertInfo( mode, 0 );
+ else setConvertInfo( mode, channelOffset );
+ }
+
+ if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device )
+ // Only one callback procedure per device.
+ stream_.mode = DUPLEX;
+ else {
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+ result = AudioDeviceCreateIOProcID( id, callbackHandler, (void *) &stream_.callbackInfo, &handle->procId[mode] );
+#else
+ // deprecated in favor of AudioDeviceCreateIOProcID()
+ result = AudioDeviceAddIOProc( id, callbackHandler, (void *) &stream_.callbackInfo );
+#endif
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::probeDeviceOpen: system error setting callback for device (" << device << ").";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+ if ( stream_.mode == OUTPUT && mode == INPUT )
+ stream_.mode = DUPLEX;
+ else
+ stream_.mode = mode;
+ }
+
+ // Setup the device property listener for over/underload.
+ property.mSelector = kAudioDeviceProcessorOverload;
+ property.mScope = kAudioObjectPropertyScopeGlobal;
+ result = AudioObjectAddPropertyListener( id, &property, xrunListener, (void *) handle );
+
+ return SUCCESS;
+
+ error:
+ if ( handle ) {
+ pthread_cond_destroy( &handle->condition );
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.state = STREAM_CLOSED;
+ return FAILURE;
+}
+
+void RtApiCore :: closeStream( void )
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiCore::closeStream(): no open stream to close!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ if (handle) {
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+ kAudioObjectPropertyScopeGlobal,
+ kAudioObjectPropertyElementMaster };
+
+ property.mSelector = kAudioDeviceProcessorOverload;
+ property.mScope = kAudioObjectPropertyScopeGlobal;
+ if (AudioObjectRemovePropertyListener( handle->id[0], &property, xrunListener, (void *) handle ) != noErr) {
+ errorText_ = "RtApiCore::closeStream(): error removing property listener!";
+ error( RtAudioError::WARNING );
+ }
+ }
+ if ( stream_.state == STREAM_RUNNING )
+ AudioDeviceStop( handle->id[0], callbackHandler );
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+ AudioDeviceDestroyIOProcID( handle->id[0], handle->procId[0] );
+#else
+ // deprecated in favor of AudioDeviceDestroyIOProcID()
+ AudioDeviceRemoveIOProc( handle->id[0], callbackHandler );
+#endif
+ }
+
+ if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
+ if (handle) {
+ AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+ kAudioObjectPropertyScopeGlobal,
+ kAudioObjectPropertyElementMaster };
+
+ property.mSelector = kAudioDeviceProcessorOverload;
+ property.mScope = kAudioObjectPropertyScopeGlobal;
+ if (AudioObjectRemovePropertyListener( handle->id[1], &property, xrunListener, (void *) handle ) != noErr) {
+ errorText_ = "RtApiCore::closeStream(): error removing property listener!";
+ error( RtAudioError::WARNING );
+ }
+ }
+ if ( stream_.state == STREAM_RUNNING )
+ AudioDeviceStop( handle->id[1], callbackHandler );
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+ AudioDeviceDestroyIOProcID( handle->id[1], handle->procId[1] );
+#else
+ // deprecated in favor of AudioDeviceDestroyIOProcID()
+ AudioDeviceRemoveIOProc( handle->id[1], callbackHandler );
+#endif
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ // Destroy pthread condition variable.
+ pthread_cond_destroy( &handle->condition );
+ delete handle;
+ stream_.apiHandle = 0;
+
+ stream_.mode = UNINITIALIZED;
+ stream_.state = STREAM_CLOSED;
+}
+
+void RtApiCore :: startStream( void )
+{
+ verifyStream();
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiCore::startStream(): the stream is already running!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ OSStatus result = noErr;
+ CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ result = AudioDeviceStart( handle->id[0], callbackHandler );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::startStream: system error (" << getErrorCode( result ) << ") starting callback procedure on device (" << stream_.device[0] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ if ( stream_.mode == INPUT ||
+ ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
+
+ result = AudioDeviceStart( handle->id[1], callbackHandler );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::startStream: system error starting input callback procedure on device (" << stream_.device[1] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ handle->drainCounter = 0;
+ handle->internalDrain = false;
+ stream_.state = STREAM_RUNNING;
+
+ unlock:
+ if ( result == noErr ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiCore :: stopStream( void )
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiCore::stopStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ OSStatus result = noErr;
+ CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ if ( handle->drainCounter == 0 ) {
+ handle->drainCounter = 2;
+ pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
+ }
+
+ result = AudioDeviceStop( handle->id[0], callbackHandler );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping callback procedure on device (" << stream_.device[0] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
+
+ result = AudioDeviceStop( handle->id[1], callbackHandler );
+ if ( result != noErr ) {
+ errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping input callback procedure on device (" << stream_.device[1] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ stream_.state = STREAM_STOPPED;
+
+ unlock:
+ if ( result == noErr ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiCore :: abortStream( void )
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiCore::abortStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+ handle->drainCounter = 2;
+
+ stopStream();
+}
+
+// This function will be called by a spawned thread when the user
+// callback function signals that the stream should be stopped or
+// aborted. It is better to handle it this way because the
+// callbackEvent() function probably should return before the AudioDeviceStop()
+// function is called.
+static void *coreStopStream( void *ptr )
+{
+ CallbackInfo *info = (CallbackInfo *) ptr;
+ RtApiCore *object = (RtApiCore *) info->object;
+
+ object->stopStream();
+ pthread_exit( NULL );
+}
+
+bool RtApiCore :: callbackEvent( AudioDeviceID deviceId,
+ const AudioBufferList *inBufferList,
+ const AudioBufferList *outBufferList )
+{
+ if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
+ error( RtAudioError::WARNING );
+ return FAILURE;
+ }
+
+ CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+ CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+
+ // Check if we were draining the stream and signal is finished.
+ if ( handle->drainCounter > 3 ) {
+ ThreadHandle threadId;
+
+ stream_.state = STREAM_STOPPING;
+ if ( handle->internalDrain == true )
+ pthread_create( &threadId, NULL, coreStopStream, info );
+ else // external call to stopStream()
+ pthread_cond_signal( &handle->condition );
+ return SUCCESS;
+ }
+
+ AudioDeviceID outputDevice = handle->id[0];
+
+ // Invoke user callback to get fresh output data UNLESS we are
+ // draining stream or duplex mode AND the input/output devices are
+ // different AND this function is called for the input device.
+ if ( handle->drainCounter == 0 && ( stream_.mode != DUPLEX || deviceId == outputDevice ) ) {
+ RtAudioCallback callback = (RtAudioCallback) info->callback;
+ double streamTime = getStreamTime();
+ RtAudioStreamStatus status = 0;
+ if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+ status |= RTAUDIO_OUTPUT_UNDERFLOW;
+ handle->xrun[0] = false;
+ }
+ if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+ status |= RTAUDIO_INPUT_OVERFLOW;
+ handle->xrun[1] = false;
+ }
+
+ int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+ stream_.bufferSize, streamTime, status, info->userData );
+ if ( cbReturnValue == 2 ) {
+ stream_.state = STREAM_STOPPING;
+ handle->drainCounter = 2;
+ abortStream();
+ return SUCCESS;
+ }
+ else if ( cbReturnValue == 1 ) {
+ handle->drainCounter = 1;
+ handle->internalDrain = true;
+ }
+ }
+
+ if ( stream_.mode == OUTPUT || ( stream_.mode == DUPLEX && deviceId == outputDevice ) ) {
+
+ if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+
+ if ( handle->nStreams[0] == 1 ) {
+ memset( outBufferList->mBuffers[handle->iStream[0]].mData,
+ 0,
+ outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
+ }
+ else { // fill multiple streams with zeros
+ for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
+ memset( outBufferList->mBuffers[handle->iStream[0]+i].mData,
+ 0,
+ outBufferList->mBuffers[handle->iStream[0]+i].mDataByteSize );
+ }
+ }
+ }
+ else if ( handle->nStreams[0] == 1 ) {
+ if ( stream_.doConvertBuffer[0] ) { // convert directly to CoreAudio stream buffer
+ convertBuffer( (char *) outBufferList->mBuffers[handle->iStream[0]].mData,
+ stream_.userBuffer[0], stream_.convertInfo[0] );
+ }
+ else { // copy from user buffer
+ memcpy( outBufferList->mBuffers[handle->iStream[0]].mData,
+ stream_.userBuffer[0],
+ outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
+ }
+ }
+ else { // fill multiple streams
+ Float32 *inBuffer = (Float32 *) stream_.userBuffer[0];
+ if ( stream_.doConvertBuffer[0] ) {
+ convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+ inBuffer = (Float32 *) stream_.deviceBuffer;
+ }
+
+ if ( stream_.deviceInterleaved[0] == false ) { // mono mode
+ UInt32 bufferBytes = outBufferList->mBuffers[handle->iStream[0]].mDataByteSize;
+ for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+ memcpy( outBufferList->mBuffers[handle->iStream[0]+i].mData,
+ (void *)&inBuffer[i*stream_.bufferSize], bufferBytes );
+ }
+ }
+ else { // fill multiple multi-channel streams with interleaved data
+ UInt32 streamChannels, channelsLeft, inJump, outJump, inOffset;
+ Float32 *out, *in;
+
+ bool inInterleaved = ( stream_.userInterleaved ) ? true : false;
+ UInt32 inChannels = stream_.nUserChannels[0];
+ if ( stream_.doConvertBuffer[0] ) {
+ inInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
+ inChannels = stream_.nDeviceChannels[0];
+ }
+
+ if ( inInterleaved ) inOffset = 1;
+ else inOffset = stream_.bufferSize;
+
+ channelsLeft = inChannels;
+ for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
+ in = inBuffer;
+ out = (Float32 *) outBufferList->mBuffers[handle->iStream[0]+i].mData;
+ streamChannels = outBufferList->mBuffers[handle->iStream[0]+i].mNumberChannels;
+
+ outJump = 0;
+ // Account for possible channel offset in first stream
+ if ( i == 0 && stream_.channelOffset[0] > 0 ) {
+ streamChannels -= stream_.channelOffset[0];
+ outJump = stream_.channelOffset[0];
+ out += outJump;
+ }
+
+ // Account for possible unfilled channels at end of the last stream
+ if ( streamChannels > channelsLeft ) {
+ outJump = streamChannels - channelsLeft;
+ streamChannels = channelsLeft;
+ }
+
+ // Determine input buffer offsets and skips
+ if ( inInterleaved ) {
+ inJump = inChannels;
+ in += inChannels - channelsLeft;
+ }
+ else {
+ inJump = 1;
+ in += (inChannels - channelsLeft) * inOffset;
+ }
+
+ for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
+ for ( unsigned int j=0; j<streamChannels; j++ ) {
+ *out++ = in[j*inOffset];
+ }
+ out += outJump;
+ in += inJump;
+ }
+ channelsLeft -= streamChannels;
+ }
+ }
+ }
+ }
+
+ // Don't bother draining input
+ if ( handle->drainCounter ) {
+ handle->drainCounter++;
+ goto unlock;
+ }
+
+ AudioDeviceID inputDevice;
+ inputDevice = handle->id[1];
+ if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && deviceId == inputDevice ) ) {
+
+ if ( handle->nStreams[1] == 1 ) {
+ if ( stream_.doConvertBuffer[1] ) { // convert directly from CoreAudio stream buffer
+ convertBuffer( stream_.userBuffer[1],
+ (char *) inBufferList->mBuffers[handle->iStream[1]].mData,
+ stream_.convertInfo[1] );
+ }
+ else { // copy to user buffer
+ memcpy( stream_.userBuffer[1],
+ inBufferList->mBuffers[handle->iStream[1]].mData,
+ inBufferList->mBuffers[handle->iStream[1]].mDataByteSize );
+ }
+ }
+ else { // read from multiple streams
+ Float32 *outBuffer = (Float32 *) stream_.userBuffer[1];
+ if ( stream_.doConvertBuffer[1] ) outBuffer = (Float32 *) stream_.deviceBuffer;
+
+ if ( stream_.deviceInterleaved[1] == false ) { // mono mode
+ UInt32 bufferBytes = inBufferList->mBuffers[handle->iStream[1]].mDataByteSize;
+ for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+ memcpy( (void *)&outBuffer[i*stream_.bufferSize],
+ inBufferList->mBuffers[handle->iStream[1]+i].mData, bufferBytes );
+ }
+ }
+ else { // read from multiple multi-channel streams
+ UInt32 streamChannels, channelsLeft, inJump, outJump, outOffset;
+ Float32 *out, *in;
+
+ bool outInterleaved = ( stream_.userInterleaved ) ? true : false;
+ UInt32 outChannels = stream_.nUserChannels[1];
+ if ( stream_.doConvertBuffer[1] ) {
+ outInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
+ outChannels = stream_.nDeviceChannels[1];
+ }
+
+ if ( outInterleaved ) outOffset = 1;
+ else outOffset = stream_.bufferSize;
+
+ channelsLeft = outChannels;
+ for ( unsigned int i=0; i<handle->nStreams[1]; i++ ) {
+ out = outBuffer;
+ in = (Float32 *) inBufferList->mBuffers[handle->iStream[1]+i].mData;
+ streamChannels = inBufferList->mBuffers[handle->iStream[1]+i].mNumberChannels;
+
+ inJump = 0;
+ // Account for possible channel offset in first stream
+ if ( i == 0 && stream_.channelOffset[1] > 0 ) {
+ streamChannels -= stream_.channelOffset[1];
+ inJump = stream_.channelOffset[1];
+ in += inJump;
+ }
+
+ // Account for possible unread channels at end of the last stream
+ if ( streamChannels > channelsLeft ) {
+ inJump = streamChannels - channelsLeft;
+ streamChannels = channelsLeft;
+ }
+
+ // Determine output buffer offsets and skips
+ if ( outInterleaved ) {
+ outJump = outChannels;
+ out += outChannels - channelsLeft;
+ }
+ else {
+ outJump = 1;
+ out += (outChannels - channelsLeft) * outOffset;
+ }
+
+ for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
+ for ( unsigned int j=0; j<streamChannels; j++ ) {
+ out[j*outOffset] = *in++;
+ }
+ out += outJump;
+ in += inJump;
+ }
+ channelsLeft -= streamChannels;
+ }
+ }
+
+ if ( stream_.doConvertBuffer[1] ) { // convert from our internal "device" buffer
+ convertBuffer( stream_.userBuffer[1],
+ stream_.deviceBuffer,
+ stream_.convertInfo[1] );
+ }
+ }
+ }
+
+ unlock:
+ //MUTEX_UNLOCK( &stream_.mutex );
+
+ RtApi::tickStreamTime();
+ return SUCCESS;
+}
+
+const char* RtApiCore :: getErrorCode( OSStatus code )
+{
+ switch( code ) {
+
+ case kAudioHardwareNotRunningError:
+ return "kAudioHardwareNotRunningError";
+
+ case kAudioHardwareUnspecifiedError:
+ return "kAudioHardwareUnspecifiedError";
+
+ case kAudioHardwareUnknownPropertyError:
+ return "kAudioHardwareUnknownPropertyError";
+
+ case kAudioHardwareBadPropertySizeError:
+ return "kAudioHardwareBadPropertySizeError";
+
+ case kAudioHardwareIllegalOperationError:
+ return "kAudioHardwareIllegalOperationError";
+
+ case kAudioHardwareBadObjectError:
+ return "kAudioHardwareBadObjectError";
+
+ case kAudioHardwareBadDeviceError:
+ return "kAudioHardwareBadDeviceError";
+
+ case kAudioHardwareBadStreamError:
+ return "kAudioHardwareBadStreamError";
+
+ case kAudioHardwareUnsupportedOperationError:
+ return "kAudioHardwareUnsupportedOperationError";
+
+ case kAudioDeviceUnsupportedFormatError:
+ return "kAudioDeviceUnsupportedFormatError";
+
+ case kAudioDevicePermissionsError:
+ return "kAudioDevicePermissionsError";
+
+ default:
+ return "CoreAudio unknown error";
+ }
+}
+
+ //******************** End of __MACOSX_CORE__ *********************//
+#endif
+
+#if defined(__UNIX_JACK__)
+
+// JACK is a low-latency audio server, originally written for the
+// GNU/Linux operating system and now also ported to OS-X. It can
+// connect a number of different applications to an audio device, as
+// well as allowing them to share audio between themselves.
+//
+// When using JACK with RtAudio, "devices" refer to JACK clients that
+// have ports connected to the server. The JACK server is typically
+// started in a terminal as follows:
+//
+// .jackd -d alsa -d hw:0
+//
+// or through an interface program such as qjackctl. Many of the
+// parameters normally set for a stream are fixed by the JACK server
+// and can be specified when the JACK server is started. In
+// particular,
+//
+// .jackd -d alsa -d hw:0 -r 44100 -p 512 -n 4
+//
+// specifies a sample rate of 44100 Hz, a buffer size of 512 sample
+// frames, and number of buffers = 4. Once the server is running, it
+// is not possible to override these values. If the values are not
+// specified in the command-line, the JACK server uses default values.
+//
+// The JACK server does not have to be running when an instance of
+// RtApiJack is created, though the function getDeviceCount() will
+// report 0 devices found until JACK has been started. When no
+// devices are available (i.e., the JACK server is not running), a
+// stream cannot be opened.
+
+#include <jack/jack.h>
+#include <unistd.h>
+#include <cstdio>
+
+// A structure to hold various information related to the Jack API
+// implementation.
+struct JackHandle {
+ jack_client_t *client;
+ jack_port_t **ports[2];
+ std::string deviceName[2];
+ bool xrun[2];
+ pthread_cond_t condition;
+ int drainCounter; // Tracks callback counts when draining
+ bool internalDrain; // Indicates if stop is initiated from callback or not.
+
+ JackHandle()
+ :client(0), drainCounter(0), internalDrain(false) { ports[0] = 0; ports[1] = 0; xrun[0] = false; xrun[1] = false; }
+};
+
+static void jackSilentError( const char * ) {};
+
+RtApiJack :: RtApiJack()
+{
+ // Nothing to do here.
+#if !defined(__RTAUDIO_DEBUG__)
+ // Turn off Jack's internal error reporting.
+ jack_set_error_function( &jackSilentError );
+#endif
+}
+
+RtApiJack :: ~RtApiJack()
+{
+ if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiJack :: getDeviceCount( void )
+{
+ // See if we can become a jack client.
+ jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
+ jack_status_t *status = NULL;
+ jack_client_t *client = jack_client_open( "RtApiJackCount", options, status );
+ if ( client == 0 ) return 0;
+
+ const char **ports;
+ std::string port, previousPort;
+ unsigned int nChannels = 0, nDevices = 0;
+ ports = jack_get_ports( client, NULL, NULL, 0 );
+ if ( ports ) {
+ // Parse the port names up to the first colon (:).
+ size_t iColon = 0;
+ do {
+ port = (char *) ports[ nChannels ];
+ iColon = port.find(":");
+ if ( iColon != std::string::npos ) {
+ port = port.substr( 0, iColon + 1 );
+ if ( port != previousPort ) {
+ nDevices++;
+ previousPort = port;
+ }
+ }
+ } while ( ports[++nChannels] );
+ free( ports );
+ }
+
+ jack_client_close( client );
+ return nDevices;
+}
+
+RtAudio::DeviceInfo RtApiJack :: getDeviceInfo( unsigned int device )
+{
+ RtAudio::DeviceInfo info;
+ info.probed = false;
+
+ jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption
+ jack_status_t *status = NULL;
+ jack_client_t *client = jack_client_open( "RtApiJackInfo", options, status );
+ if ( client == 0 ) {
+ errorText_ = "RtApiJack::getDeviceInfo: Jack server not found or connection error!";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ const char **ports;
+ std::string port, previousPort;
+ unsigned int nPorts = 0, nDevices = 0;
+ ports = jack_get_ports( client, NULL, NULL, 0 );
+ if ( ports ) {
+ // Parse the port names up to the first colon (:).
+ size_t iColon = 0;
+ do {
+ port = (char *) ports[ nPorts ];
+ iColon = port.find(":");
+ if ( iColon != std::string::npos ) {
+ port = port.substr( 0, iColon );
+ if ( port != previousPort ) {
+ if ( nDevices == device ) info.name = port;
+ nDevices++;
+ previousPort = port;
+ }
+ }
+ } while ( ports[++nPorts] );
+ free( ports );
+ }
+
+ if ( device >= nDevices ) {
+ jack_client_close( client );
+ errorText_ = "RtApiJack::getDeviceInfo: device ID is invalid!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ // Get the current jack server sample rate.
+ info.sampleRates.clear();
+
+ info.preferredSampleRate = jack_get_sample_rate( client );
+ info.sampleRates.push_back( info.preferredSampleRate );
+
+ // Count the available ports containing the client name as device
+ // channels. Jack "input ports" equal RtAudio output channels.
+ unsigned int nChannels = 0;
+ ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsInput );
+ if ( ports ) {
+ while ( ports[ nChannels ] ) nChannels++;
+ free( ports );
+ info.outputChannels = nChannels;
+ }
+
+ // Jack "output ports" equal RtAudio input channels.
+ nChannels = 0;
+ ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsOutput );
+ if ( ports ) {
+ while ( ports[ nChannels ] ) nChannels++;
+ free( ports );
+ info.inputChannels = nChannels;
+ }
+
+ if ( info.outputChannels == 0 && info.inputChannels == 0 ) {
+ jack_client_close(client);
+ errorText_ = "RtApiJack::getDeviceInfo: error determining Jack input/output channels!";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // If device opens for both playback and capture, we determine the channels.
+ if ( info.outputChannels > 0 && info.inputChannels > 0 )
+ info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+ // Jack always uses 32-bit floats.
+ info.nativeFormats = RTAUDIO_FLOAT32;
+
+ // Jack doesn't provide default devices so we'll use the first available one.
+ if ( device == 0 && info.outputChannels > 0 )
+ info.isDefaultOutput = true;
+ if ( device == 0 && info.inputChannels > 0 )
+ info.isDefaultInput = true;
+
+ jack_client_close(client);
+ info.probed = true;
+ return info;
+}
+
+static int jackCallbackHandler( jack_nframes_t nframes, void *infoPointer )
+{
+ CallbackInfo *info = (CallbackInfo *) infoPointer;
+
+ RtApiJack *object = (RtApiJack *) info->object;
+ if ( object->callbackEvent( (unsigned long) nframes ) == false ) return 1;
+
+ return 0;
+}
+
+// This function will be called by a spawned thread when the Jack
+// server signals that it is shutting down. It is necessary to handle
+// it this way because the jackShutdown() function must return before
+// the jack_deactivate() function (in closeStream()) will return.
+static void *jackCloseStream( void *ptr )
+{
+ CallbackInfo *info = (CallbackInfo *) ptr;
+ RtApiJack *object = (RtApiJack *) info->object;
+
+ object->closeStream();
+
+ pthread_exit( NULL );
+}
+static void jackShutdown( void *infoPointer )
+{
+ CallbackInfo *info = (CallbackInfo *) infoPointer;
+ RtApiJack *object = (RtApiJack *) info->object;
+
+ // Check current stream state. If stopped, then we'll assume this
+ // was called as a result of a call to RtApiJack::stopStream (the
+ // deactivation of a client handle causes this function to be called).
+ // If not, we'll assume the Jack server is shutting down or some
+ // other problem occurred and we should close the stream.
+ if ( object->isStreamRunning() == false ) return;
+
+ ThreadHandle threadId;
+ pthread_create( &threadId, NULL, jackCloseStream, info );
+ std::cerr << "\nRtApiJack: the Jack server is shutting down this client ... stream stopped and closed!!\n" << std::endl;
+}
+
+static int jackXrun( void *infoPointer )
+{
+ JackHandle *handle = (JackHandle *) infoPointer;
+
+ if ( handle->ports[0] ) handle->xrun[0] = true;
+ if ( handle->ports[1] ) handle->xrun[1] = true;
+
+ return 0;
+}
+
+bool RtApiJack :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+ unsigned int firstChannel, unsigned int sampleRate,
+ RtAudioFormat format, unsigned int *bufferSize,
+ RtAudio::StreamOptions *options )
+{
+ JackHandle *handle = (JackHandle *) stream_.apiHandle;
+
+ // Look for jack server and try to become a client (only do once per stream).
+ jack_client_t *client = 0;
+ if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) {
+ jack_options_t jackoptions = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
+ jack_status_t *status = NULL;
+ if ( options && !options->streamName.empty() )
+ client = jack_client_open( options->streamName.c_str(), jackoptions, status );
+ else
+ client = jack_client_open( "RtApiJack", jackoptions, status );
+ if ( client == 0 ) {
+ errorText_ = "RtApiJack::probeDeviceOpen: Jack server not found or connection error!";
+ error( RtAudioError::WARNING );
+ return FAILURE;
+ }
+ }
+ else {
+ // The handle must have been created on an earlier pass.
+ client = handle->client;
+ }
+
+ const char **ports;
+ std::string port, previousPort, deviceName;
+ unsigned int nPorts = 0, nDevices = 0;
+ ports = jack_get_ports( client, NULL, NULL, 0 );
+ if ( ports ) {
+ // Parse the port names up to the first colon (:).
+ size_t iColon = 0;
+ do {
+ port = (char *) ports[ nPorts ];
+ iColon = port.find(":");
+ if ( iColon != std::string::npos ) {
+ port = port.substr( 0, iColon );
+ if ( port != previousPort ) {
+ if ( nDevices == device ) deviceName = port;
+ nDevices++;
+ previousPort = port;
+ }
+ }
+ } while ( ports[++nPorts] );
+ free( ports );
+ }
+
+ if ( device >= nDevices ) {
+ errorText_ = "RtApiJack::probeDeviceOpen: device ID is invalid!";
+ return FAILURE;
+ }
+
+ // Count the available ports containing the client name as device
+ // channels. Jack "input ports" equal RtAudio output channels.
+ unsigned int nChannels = 0;
+ unsigned long flag = JackPortIsInput;
+ if ( mode == INPUT ) flag = JackPortIsOutput;
+ ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
+ if ( ports ) {
+ while ( ports[ nChannels ] ) nChannels++;
+ free( ports );
+ }
+
+ // Compare the jack ports for specified client to the requested number of channels.
+ if ( nChannels < (channels + firstChannel) ) {
+ errorStream_ << "RtApiJack::probeDeviceOpen: requested number of channels (" << channels << ") + offset (" << firstChannel << ") not found for specified device (" << device << ":" << deviceName << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Check the jack server sample rate.
+ unsigned int jackRate = jack_get_sample_rate( client );
+ if ( sampleRate != jackRate ) {
+ jack_client_close( client );
+ errorStream_ << "RtApiJack::probeDeviceOpen: the requested sample rate (" << sampleRate << ") is different than the JACK server rate (" << jackRate << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ stream_.sampleRate = jackRate;
+
+ // Get the latency of the JACK port.
+ ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
+ if ( ports[ firstChannel ] ) {
+ // Added by Ge Wang
+ jack_latency_callback_mode_t cbmode = (mode == INPUT ? JackCaptureLatency : JackPlaybackLatency);
+ // the range (usually the min and max are equal)
+ jack_latency_range_t latrange; latrange.min = latrange.max = 0;
+ // get the latency range
+ jack_port_get_latency_range( jack_port_by_name( client, ports[firstChannel] ), cbmode, &latrange );
+ // be optimistic, use the min!
+ stream_.latency[mode] = latrange.min;
+ //stream_.latency[mode] = jack_port_get_latency( jack_port_by_name( client, ports[ firstChannel ] ) );
+ }
+ free( ports );
+
+ // The jack server always uses 32-bit floating-point data.
+ stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+ stream_.userFormat = format;
+
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+ else stream_.userInterleaved = true;
+
+ // Jack always uses non-interleaved buffers.
+ stream_.deviceInterleaved[mode] = false;
+
+ // Jack always provides host byte-ordered data.
+ stream_.doByteSwap[mode] = false;
+
+ // Get the buffer size. The buffer size and number of buffers
+ // (periods) is set when the jack server is started.
+ stream_.bufferSize = (int) jack_get_buffer_size( client );
+ *bufferSize = stream_.bufferSize;
+
+ stream_.nDeviceChannels[mode] = channels;
+ stream_.nUserChannels[mode] = channels;
+
+ // Set flags for buffer conversion.
+ stream_.doConvertBuffer[mode] = false;
+ if ( stream_.userFormat != stream_.deviceFormat[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+ stream_.nUserChannels[mode] > 1 )
+ stream_.doConvertBuffer[mode] = true;
+
+ // Allocate our JackHandle structure for the stream.
+ if ( handle == 0 ) {
+ try {
+ handle = new JackHandle;
+ }
+ catch ( std::bad_alloc& ) {
+ errorText_ = "RtApiJack::probeDeviceOpen: error allocating JackHandle memory.";
+ goto error;
+ }
+
+ if ( pthread_cond_init(&handle->condition, NULL) ) {
+ errorText_ = "RtApiJack::probeDeviceOpen: error initializing pthread condition variable.";
+ goto error;
+ }
+ stream_.apiHandle = (void *) handle;
+ handle->client = client;
+ }
+ handle->deviceName[mode] = deviceName;
+
+ // Allocate necessary internal buffers.
+ unsigned long bufferBytes;
+ bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+ stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.userBuffer[mode] == NULL ) {
+ errorText_ = "RtApiJack::probeDeviceOpen: error allocating user buffer memory.";
+ goto error;
+ }
+
+ if ( stream_.doConvertBuffer[mode] ) {
+
+ bool makeBuffer = true;
+ if ( mode == OUTPUT )
+ bufferBytes = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+ else { // mode == INPUT
+ bufferBytes = stream_.nDeviceChannels[1] * formatBytes( stream_.deviceFormat[1] );
+ if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+ unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes(stream_.deviceFormat[0]);
+ if ( bufferBytes < bytesOut ) makeBuffer = false;
+ }
+ }
+
+ if ( makeBuffer ) {
+ bufferBytes *= *bufferSize;
+ if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+ stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.deviceBuffer == NULL ) {
+ errorText_ = "RtApiJack::probeDeviceOpen: error allocating device buffer memory.";
+ goto error;
+ }
+ }
+ }
+
+ // Allocate memory for the Jack ports (channels) identifiers.
+ handle->ports[mode] = (jack_port_t **) malloc ( sizeof (jack_port_t *) * channels );
+ if ( handle->ports[mode] == NULL ) {
+ errorText_ = "RtApiJack::probeDeviceOpen: error allocating port memory.";
+ goto error;
+ }
+
+ stream_.device[mode] = device;
+ stream_.channelOffset[mode] = firstChannel;
+ stream_.state = STREAM_STOPPED;
+ stream_.callbackInfo.object = (void *) this;
+
+ if ( stream_.mode == OUTPUT && mode == INPUT )
+ // We had already set up the stream for output.
+ stream_.mode = DUPLEX;
+ else {
+ stream_.mode = mode;
+ jack_set_process_callback( handle->client, jackCallbackHandler, (void *) &stream_.callbackInfo );
+ jack_set_xrun_callback( handle->client, jackXrun, (void *) &handle );
+ jack_on_shutdown( handle->client, jackShutdown, (void *) &stream_.callbackInfo );
+ }
+
+ // Register our ports.
+ char label[64];
+ if ( mode == OUTPUT ) {
+ for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+ snprintf( label, 64, "outport %d", i );
+ handle->ports[0][i] = jack_port_register( handle->client, (const char *)label,
+ JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0 );
+ }
+ }
+ else {
+ for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+ snprintf( label, 64, "inport %d", i );
+ handle->ports[1][i] = jack_port_register( handle->client, (const char *)label,
+ JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0 );
+ }
+ }
+
+ // Setup the buffer conversion information structure. We don't use
+ // buffers to do channel offsets, so we override that parameter
+ // here.
+ if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
+
+ return SUCCESS;
+
+ error:
+ if ( handle ) {
+ pthread_cond_destroy( &handle->condition );
+ jack_client_close( handle->client );
+
+ if ( handle->ports[0] ) free( handle->ports[0] );
+ if ( handle->ports[1] ) free( handle->ports[1] );
+
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ return FAILURE;
+}
+
+void RtApiJack :: closeStream( void )
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiJack::closeStream(): no open stream to close!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ JackHandle *handle = (JackHandle *) stream_.apiHandle;
+ if ( handle ) {
+
+ if ( stream_.state == STREAM_RUNNING )
+ jack_deactivate( handle->client );
+
+ jack_client_close( handle->client );
+ }
+
+ if ( handle ) {
+ if ( handle->ports[0] ) free( handle->ports[0] );
+ if ( handle->ports[1] ) free( handle->ports[1] );
+ pthread_cond_destroy( &handle->condition );
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.mode = UNINITIALIZED;
+ stream_.state = STREAM_CLOSED;
+}
+
+void RtApiJack :: startStream( void )
+{
+ verifyStream();
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiJack::startStream(): the stream is already running!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ JackHandle *handle = (JackHandle *) stream_.apiHandle;
+ int result = jack_activate( handle->client );
+ if ( result ) {
+ errorText_ = "RtApiJack::startStream(): unable to activate JACK client!";
+ goto unlock;
+ }
+
+ const char **ports;
+
+ // Get the list of available ports.
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ result = 1;
+ ports = jack_get_ports( handle->client, handle->deviceName[0].c_str(), NULL, JackPortIsInput);
+ if ( ports == NULL) {
+ errorText_ = "RtApiJack::startStream(): error determining available JACK input ports!";
+ goto unlock;
+ }
+
+ // Now make the port connections. Since RtAudio wasn't designed to
+ // allow the user to select particular channels of a device, we'll
+ // just open the first "nChannels" ports with offset.
+ for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+ result = 1;
+ if ( ports[ stream_.channelOffset[0] + i ] )
+ result = jack_connect( handle->client, jack_port_name( handle->ports[0][i] ), ports[ stream_.channelOffset[0] + i ] );
+ if ( result ) {
+ free( ports );
+ errorText_ = "RtApiJack::startStream(): error connecting output ports!";
+ goto unlock;
+ }
+ }
+ free(ports);
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+ result = 1;
+ ports = jack_get_ports( handle->client, handle->deviceName[1].c_str(), NULL, JackPortIsOutput );
+ if ( ports == NULL) {
+ errorText_ = "RtApiJack::startStream(): error determining available JACK output ports!";
+ goto unlock;
+ }
+
+ // Now make the port connections. See note above.
+ for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+ result = 1;
+ if ( ports[ stream_.channelOffset[1] + i ] )
+ result = jack_connect( handle->client, ports[ stream_.channelOffset[1] + i ], jack_port_name( handle->ports[1][i] ) );
+ if ( result ) {
+ free( ports );
+ errorText_ = "RtApiJack::startStream(): error connecting input ports!";
+ goto unlock;
+ }
+ }
+ free(ports);
+ }
+
+ handle->drainCounter = 0;
+ handle->internalDrain = false;
+ stream_.state = STREAM_RUNNING;
+
+ unlock:
+ if ( result == 0 ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiJack :: stopStream( void )
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiJack::stopStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ JackHandle *handle = (JackHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ if ( handle->drainCounter == 0 ) {
+ handle->drainCounter = 2;
+ pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
+ }
+ }
+
+ jack_deactivate( handle->client );
+ stream_.state = STREAM_STOPPED;
+}
+
+void RtApiJack :: abortStream( void )
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiJack::abortStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ JackHandle *handle = (JackHandle *) stream_.apiHandle;
+ handle->drainCounter = 2;
+
+ stopStream();
+}
+
+// This function will be called by a spawned thread when the user
+// callback function signals that the stream should be stopped or
+// aborted. It is necessary to handle it this way because the
+// callbackEvent() function must return before the jack_deactivate()
+// function will return.
+static void *jackStopStream( void *ptr )
+{
+ CallbackInfo *info = (CallbackInfo *) ptr;
+ RtApiJack *object = (RtApiJack *) info->object;
+
+ object->stopStream();
+ pthread_exit( NULL );
+}
+
+bool RtApiJack :: callbackEvent( unsigned long nframes )
+{
+ if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
+ error( RtAudioError::WARNING );
+ return FAILURE;
+ }
+ if ( stream_.bufferSize != nframes ) {
+ errorText_ = "RtApiCore::callbackEvent(): the JACK buffer size has changed ... cannot process!";
+ error( RtAudioError::WARNING );
+ return FAILURE;
+ }
+
+ CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+ JackHandle *handle = (JackHandle *) stream_.apiHandle;
+
+ // Check if we were draining the stream and signal is finished.
+ if ( handle->drainCounter > 3 ) {
+ ThreadHandle threadId;
+
+ stream_.state = STREAM_STOPPING;
+ if ( handle->internalDrain == true )
+ pthread_create( &threadId, NULL, jackStopStream, info );
+ else
+ pthread_cond_signal( &handle->condition );
+ return SUCCESS;
+ }
+
+ // Invoke user callback first, to get fresh output data.
+ if ( handle->drainCounter == 0 ) {
+ RtAudioCallback callback = (RtAudioCallback) info->callback;
+ double streamTime = getStreamTime();
+ RtAudioStreamStatus status = 0;
+ if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+ status |= RTAUDIO_OUTPUT_UNDERFLOW;
+ handle->xrun[0] = false;
+ }
+ if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+ status |= RTAUDIO_INPUT_OVERFLOW;
+ handle->xrun[1] = false;
+ }
+ int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+ stream_.bufferSize, streamTime, status, info->userData );
+ if ( cbReturnValue == 2 ) {
+ stream_.state = STREAM_STOPPING;
+ handle->drainCounter = 2;
+ ThreadHandle id;
+ pthread_create( &id, NULL, jackStopStream, info );
+ return SUCCESS;
+ }
+ else if ( cbReturnValue == 1 ) {
+ handle->drainCounter = 1;
+ handle->internalDrain = true;
+ }
+ }
+
+ jack_default_audio_sample_t *jackbuffer;
+ unsigned long bufferBytes = nframes * sizeof( jack_default_audio_sample_t );
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+
+ for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
+ jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
+ memset( jackbuffer, 0, bufferBytes );
+ }
+
+ }
+ else if ( stream_.doConvertBuffer[0] ) {
+
+ convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+
+ for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
+ jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
+ memcpy( jackbuffer, &stream_.deviceBuffer[i*bufferBytes], bufferBytes );
+ }
+ }
+ else { // no buffer conversion
+ for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+ jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
+ memcpy( jackbuffer, &stream_.userBuffer[0][i*bufferBytes], bufferBytes );
+ }
+ }
+ }
+
+ // Don't bother draining input
+ if ( handle->drainCounter ) {
+ handle->drainCounter++;
+ goto unlock;
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+ if ( stream_.doConvertBuffer[1] ) {
+ for ( unsigned int i=0; i<stream_.nDeviceChannels[1]; i++ ) {
+ jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
+ memcpy( &stream_.deviceBuffer[i*bufferBytes], jackbuffer, bufferBytes );
+ }
+ convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+ }
+ else { // no buffer conversion
+ for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+ jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
+ memcpy( &stream_.userBuffer[1][i*bufferBytes], jackbuffer, bufferBytes );
+ }
+ }
+ }
+
+ unlock:
+ RtApi::tickStreamTime();
+ return SUCCESS;
+}
+ //******************** End of __UNIX_JACK__ *********************//
+#endif
+
+#if defined(__WINDOWS_ASIO__) // ASIO API on Windows
+
+// The ASIO API is designed around a callback scheme, so this
+// implementation is similar to that used for OS-X CoreAudio and Linux
+// Jack. The primary constraint with ASIO is that it only allows
+// access to a single driver at a time. Thus, it is not possible to
+// have more than one simultaneous RtAudio stream.
+//
+// This implementation also requires a number of external ASIO files
+// and a few global variables. The ASIO callback scheme does not
+// allow for the passing of user data, so we must create a global
+// pointer to our callbackInfo structure.
+//
+// On unix systems, we make use of a pthread condition variable.
+// Since there is no equivalent in Windows, I hacked something based
+// on information found in
+// http://www.cs.wustl.edu/~schmidt/win32-cv-1.html.
+
+#include "asiosys.h"
+#include "asio.h"
+#include "iasiothiscallresolver.h"
+#include "asiodrivers.h"
+#include <cmath>
+
+static AsioDrivers drivers;
+static ASIOCallbacks asioCallbacks;
+static ASIODriverInfo driverInfo;
+static CallbackInfo *asioCallbackInfo;
+static bool asioXRun;
+
+struct AsioHandle {
+ int drainCounter; // Tracks callback counts when draining
+ bool internalDrain; // Indicates if stop is initiated from callback or not.
+ ASIOBufferInfo *bufferInfos;
+ HANDLE condition;
+
+ AsioHandle()
+ :drainCounter(0), internalDrain(false), bufferInfos(0) {}
+};
+
+// Function declarations (definitions at end of section)
+static const char* getAsioErrorString( ASIOError result );
+static void sampleRateChanged( ASIOSampleRate sRate );
+static long asioMessages( long selector, long value, void* message, double* opt );
+
+RtApiAsio :: RtApiAsio()
+{
+ // ASIO cannot run on a multi-threaded appartment. You can call
+ // CoInitialize beforehand, but it must be for appartment threading
+ // (in which case, CoInitilialize will return S_FALSE here).
+ coInitialized_ = false;
+ HRESULT hr = CoInitialize( NULL );
+ if ( FAILED(hr) ) {
+ errorText_ = "RtApiAsio::ASIO requires a single-threaded appartment. Call CoInitializeEx(0,COINIT_APARTMENTTHREADED)";
+ error( RtAudioError::WARNING );
+ }
+ coInitialized_ = true;
+
+ drivers.removeCurrentDriver();
+ driverInfo.asioVersion = 2;
+
+ // See note in DirectSound implementation about GetDesktopWindow().
+ driverInfo.sysRef = GetForegroundWindow();
+}
+
+RtApiAsio :: ~RtApiAsio()
+{
+ if ( stream_.state != STREAM_CLOSED ) closeStream();
+ if ( coInitialized_ ) CoUninitialize();
+}
+
+unsigned int RtApiAsio :: getDeviceCount( void )
+{
+ return (unsigned int) drivers.asioGetNumDev();
+}
+
+RtAudio::DeviceInfo RtApiAsio :: getDeviceInfo( unsigned int device )
+{
+ RtAudio::DeviceInfo info;
+ info.probed = false;
+
+ // Get device ID
+ unsigned int nDevices = getDeviceCount();
+ if ( nDevices == 0 ) {
+ errorText_ = "RtApiAsio::getDeviceInfo: no devices found!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ if ( device >= nDevices ) {
+ errorText_ = "RtApiAsio::getDeviceInfo: device ID is invalid!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ // If a stream is already open, we cannot probe other devices. Thus, use the saved results.
+ if ( stream_.state != STREAM_CLOSED ) {
+ if ( device >= devices_.size() ) {
+ errorText_ = "RtApiAsio::getDeviceInfo: device ID was not present before stream was opened.";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+ return devices_[ device ];
+ }
+
+ char driverName[32];
+ ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::getDeviceInfo: unable to get driver name (" << getAsioErrorString( result ) << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ info.name = driverName;
+
+ if ( !drivers.loadDriver( driverName ) ) {
+ errorStream_ << "RtApiAsio::getDeviceInfo: unable to load driver (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ result = ASIOInit( &driverInfo );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Determine the device channel information.
+ long inputChannels, outputChannels;
+ result = ASIOGetChannels( &inputChannels, &outputChannels );
+ if ( result != ASE_OK ) {
+ drivers.removeCurrentDriver();
+ errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ info.outputChannels = outputChannels;
+ info.inputChannels = inputChannels;
+ if ( info.outputChannels > 0 && info.inputChannels > 0 )
+ info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+ // Determine the supported sample rates.
+ info.sampleRates.clear();
+ for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
+ result = ASIOCanSampleRate( (ASIOSampleRate) SAMPLE_RATES[i] );
+ if ( result == ASE_OK ) {
+ info.sampleRates.push_back( SAMPLE_RATES[i] );
+
+ if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
+ info.preferredSampleRate = SAMPLE_RATES[i];
+ }
+ }
+
+ // Determine supported data types ... just check first channel and assume rest are the same.
+ ASIOChannelInfo channelInfo;
+ channelInfo.channel = 0;
+ channelInfo.isInput = true;
+ if ( info.inputChannels <= 0 ) channelInfo.isInput = false;
+ result = ASIOGetChannelInfo( &channelInfo );
+ if ( result != ASE_OK ) {
+ drivers.removeCurrentDriver();
+ errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting driver channel info (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ info.nativeFormats = 0;
+ if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB )
+ info.nativeFormats |= RTAUDIO_SINT16;
+ else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB )
+ info.nativeFormats |= RTAUDIO_SINT32;
+ else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB )
+ info.nativeFormats |= RTAUDIO_FLOAT32;
+ else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB )
+ info.nativeFormats |= RTAUDIO_FLOAT64;
+ else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB )
+ info.nativeFormats |= RTAUDIO_SINT24;
+
+ if ( info.outputChannels > 0 )
+ if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
+ if ( info.inputChannels > 0 )
+ if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
+
+ info.probed = true;
+ drivers.removeCurrentDriver();
+ return info;
+}
+
+static void bufferSwitch( long index, ASIOBool /*processNow*/ )
+{
+ RtApiAsio *object = (RtApiAsio *) asioCallbackInfo->object;
+ object->callbackEvent( index );
+}
+
+void RtApiAsio :: saveDeviceInfo( void )
+{
+ devices_.clear();
+
+ unsigned int nDevices = getDeviceCount();
+ devices_.resize( nDevices );
+ for ( unsigned int i=0; i<nDevices; i++ )
+ devices_[i] = getDeviceInfo( i );
+}
+
+bool RtApiAsio :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+ unsigned int firstChannel, unsigned int sampleRate,
+ RtAudioFormat format, unsigned int *bufferSize,
+ RtAudio::StreamOptions *options )
+{////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ bool isDuplexInput = mode == INPUT && stream_.mode == OUTPUT;
+
+ // For ASIO, a duplex stream MUST use the same driver.
+ if ( isDuplexInput && stream_.device[0] != device ) {
+ errorText_ = "RtApiAsio::probeDeviceOpen: an ASIO duplex stream must use the same device for input and output!";
+ return FAILURE;
+ }
+
+ char driverName[32];
+ ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: unable to get driver name (" << getAsioErrorString( result ) << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Only load the driver once for duplex stream.
+ if ( !isDuplexInput ) {
+ // The getDeviceInfo() function will not work when a stream is open
+ // because ASIO does not allow multiple devices to run at the same
+ // time. Thus, we'll probe the system before opening a stream and
+ // save the results for use by getDeviceInfo().
+ this->saveDeviceInfo();
+
+ if ( !drivers.loadDriver( driverName ) ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: unable to load driver (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ result = ASIOInit( &driverInfo );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+
+ // keep them before any "goto error", they are used for error cleanup + goto device boundary checks
+ bool buffersAllocated = false;
+ AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+ unsigned int nChannels;
+
+
+ // Check the device channel count.
+ long inputChannels, outputChannels;
+ result = ASIOGetChannels( &inputChannels, &outputChannels );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+
+ if ( ( mode == OUTPUT && (channels+firstChannel) > (unsigned int) outputChannels) ||
+ ( mode == INPUT && (channels+firstChannel) > (unsigned int) inputChannels) ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested channel count (" << channels << ") + offset (" << firstChannel << ").";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+ stream_.nDeviceChannels[mode] = channels;
+ stream_.nUserChannels[mode] = channels;
+ stream_.channelOffset[mode] = firstChannel;
+
+ // Verify the sample rate is supported.
+ result = ASIOCanSampleRate( (ASIOSampleRate) sampleRate );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested sample rate (" << sampleRate << ").";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+
+ // Get the current sample rate
+ ASIOSampleRate currentRate;
+ result = ASIOGetSampleRate( &currentRate );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error getting sample rate.";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+
+ // Set the sample rate only if necessary
+ if ( currentRate != sampleRate ) {
+ result = ASIOSetSampleRate( (ASIOSampleRate) sampleRate );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error setting sample rate (" << sampleRate << ").";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+ }
+
+ // Determine the driver data type.
+ ASIOChannelInfo channelInfo;
+ channelInfo.channel = 0;
+ if ( mode == OUTPUT ) channelInfo.isInput = false;
+ else channelInfo.isInput = true;
+ result = ASIOGetChannelInfo( &channelInfo );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting data format.";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+
+ // Assuming WINDOWS host is always little-endian.
+ stream_.doByteSwap[mode] = false;
+ stream_.userFormat = format;
+ stream_.deviceFormat[mode] = 0;
+ if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB ) {
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ if ( channelInfo.type == ASIOSTInt16MSB ) stream_.doByteSwap[mode] = true;
+ }
+ else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB ) {
+ stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+ if ( channelInfo.type == ASIOSTInt32MSB ) stream_.doByteSwap[mode] = true;
+ }
+ else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB ) {
+ stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+ if ( channelInfo.type == ASIOSTFloat32MSB ) stream_.doByteSwap[mode] = true;
+ }
+ else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB ) {
+ stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
+ if ( channelInfo.type == ASIOSTFloat64MSB ) stream_.doByteSwap[mode] = true;
+ }
+ else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB ) {
+ stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+ if ( channelInfo.type == ASIOSTInt24MSB ) stream_.doByteSwap[mode] = true;
+ }
+
+ if ( stream_.deviceFormat[mode] == 0 ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") data format not supported by RtAudio.";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+
+ // Set the buffer size. For a duplex stream, this will end up
+ // setting the buffer size based on the input constraints, which
+ // should be ok.
+ long minSize, maxSize, preferSize, granularity;
+ result = ASIOGetBufferSize( &minSize, &maxSize, &preferSize, &granularity );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting buffer size.";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+
+ if ( isDuplexInput ) {
+ // When this is the duplex input (output was opened before), then we have to use the same
+ // buffersize as the output, because it might use the preferred buffer size, which most
+ // likely wasn't passed as input to this. The buffer sizes have to be identically anyway,
+ // So instead of throwing an error, make them equal. The caller uses the reference
+ // to the "bufferSize" param as usual to set up processing buffers.
+
+ *bufferSize = stream_.bufferSize;
+
+ } else {
+ if ( *bufferSize == 0 ) *bufferSize = preferSize;
+ else if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
+ else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
+ else if ( granularity == -1 ) {
+ // Make sure bufferSize is a power of two.
+ int log2_of_min_size = 0;
+ int log2_of_max_size = 0;
+
+ for ( unsigned int i = 0; i < sizeof(long) * 8; i++ ) {
+ if ( minSize & ((long)1 << i) ) log2_of_min_size = i;
+ if ( maxSize & ((long)1 << i) ) log2_of_max_size = i;
+ }
+
+ long min_delta = std::abs( (long)*bufferSize - ((long)1 << log2_of_min_size) );
+ int min_delta_num = log2_of_min_size;
+
+ for (int i = log2_of_min_size + 1; i <= log2_of_max_size; i++) {
+ long current_delta = std::abs( (long)*bufferSize - ((long)1 << i) );
+ if (current_delta < min_delta) {
+ min_delta = current_delta;
+ min_delta_num = i;
+ }
+ }
+
+ *bufferSize = ( (unsigned int)1 << min_delta_num );
+ if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
+ else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
+ }
+ else if ( granularity != 0 ) {
+ // Set to an even multiple of granularity, rounding up.
+ *bufferSize = (*bufferSize + granularity-1) / granularity * granularity;
+ }
+ }
+
+ /*
+ // we don't use it anymore, see above!
+ // Just left it here for the case...
+ if ( isDuplexInput && stream_.bufferSize != *bufferSize ) {
+ errorText_ = "RtApiAsio::probeDeviceOpen: input/output buffersize discrepancy!";
+ goto error;
+ }
+ */
+
+ stream_.bufferSize = *bufferSize;
+ stream_.nBuffers = 2;
+
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+ else stream_.userInterleaved = true;
+
+ // ASIO always uses non-interleaved buffers.
+ stream_.deviceInterleaved[mode] = false;
+
+ // Allocate, if necessary, our AsioHandle structure for the stream.
+ if ( handle == 0 ) {
+ try {
+ handle = new AsioHandle;
+ }
+ catch ( std::bad_alloc& ) {
+ errorText_ = "RtApiAsio::probeDeviceOpen: error allocating AsioHandle memory.";
+ goto error;
+ }
+ handle->bufferInfos = 0;
+
+ // Create a manual-reset event.
+ handle->condition = CreateEvent( NULL, // no security
+ TRUE, // manual-reset
+ FALSE, // non-signaled initially
+ NULL ); // unnamed
+ stream_.apiHandle = (void *) handle;
+ }
+
+ // Create the ASIO internal buffers. Since RtAudio sets up input
+ // and output separately, we'll have to dispose of previously
+ // created output buffers for a duplex stream.
+ if ( mode == INPUT && stream_.mode == OUTPUT ) {
+ ASIODisposeBuffers();
+ if ( handle->bufferInfos ) free( handle->bufferInfos );
+ }
+
+ // Allocate, initialize, and save the bufferInfos in our stream callbackInfo structure.
+ unsigned int i;
+ nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
+ handle->bufferInfos = (ASIOBufferInfo *) malloc( nChannels * sizeof(ASIOBufferInfo) );
+ if ( handle->bufferInfos == NULL ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: error allocating bufferInfo memory for driver (" << driverName << ").";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+
+ ASIOBufferInfo *infos;
+ infos = handle->bufferInfos;
+ for ( i=0; i<stream_.nDeviceChannels[0]; i++, infos++ ) {
+ infos->isInput = ASIOFalse;
+ infos->channelNum = i + stream_.channelOffset[0];
+ infos->buffers[0] = infos->buffers[1] = 0;
+ }
+ for ( i=0; i<stream_.nDeviceChannels[1]; i++, infos++ ) {
+ infos->isInput = ASIOTrue;
+ infos->channelNum = i + stream_.channelOffset[1];
+ infos->buffers[0] = infos->buffers[1] = 0;
+ }
+
+ // prepare for callbacks
+ stream_.sampleRate = sampleRate;
+ stream_.device[mode] = device;
+ stream_.mode = isDuplexInput ? DUPLEX : mode;
+
+ // store this class instance before registering callbacks, that are going to use it
+ asioCallbackInfo = &stream_.callbackInfo;
+ stream_.callbackInfo.object = (void *) this;
+
+ // Set up the ASIO callback structure and create the ASIO data buffers.
+ asioCallbacks.bufferSwitch = &bufferSwitch;
+ asioCallbacks.sampleRateDidChange = &sampleRateChanged;
+ asioCallbacks.asioMessage = &asioMessages;
+ asioCallbacks.bufferSwitchTimeInfo = NULL;
+ result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
+ if ( result != ASE_OK ) {
+ // Standard method failed. This can happen with strict/misbehaving drivers that return valid buffer size ranges
+ // but only accept the preferred buffer size as parameter for ASIOCreateBuffers. eg. Creatives ASIO driver
+ // in that case, let's be naïve and try that instead
+ *bufferSize = preferSize;
+ stream_.bufferSize = *bufferSize;
+ result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
+ }
+
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") creating buffers.";
+ errorText_ = errorStream_.str();
+ goto error;
+ }
+ buffersAllocated = true;
+ stream_.state = STREAM_STOPPED;
+
+ // Set flags for buffer conversion.
+ stream_.doConvertBuffer[mode] = false;
+ if ( stream_.userFormat != stream_.deviceFormat[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+ stream_.nUserChannels[mode] > 1 )
+ stream_.doConvertBuffer[mode] = true;
+
+ // Allocate necessary internal buffers
+ unsigned long bufferBytes;
+ bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+ stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.userBuffer[mode] == NULL ) {
+ errorText_ = "RtApiAsio::probeDeviceOpen: error allocating user buffer memory.";
+ goto error;
+ }
+
+ if ( stream_.doConvertBuffer[mode] ) {
+
+ bool makeBuffer = true;
+ bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+ if ( isDuplexInput && stream_.deviceBuffer ) {
+ unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+ if ( bufferBytes <= bytesOut ) makeBuffer = false;
+ }
+
+ if ( makeBuffer ) {
+ bufferBytes *= *bufferSize;
+ if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+ stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.deviceBuffer == NULL ) {
+ errorText_ = "RtApiAsio::probeDeviceOpen: error allocating device buffer memory.";
+ goto error;
+ }
+ }
+ }
+
+ // Determine device latencies
+ long inputLatency, outputLatency;
+ result = ASIOGetLatencies( &inputLatency, &outputLatency );
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting latency.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING); // warn but don't fail
+ }
+ else {
+ stream_.latency[0] = outputLatency;
+ stream_.latency[1] = inputLatency;
+ }
+
+ // Setup the buffer conversion information structure. We don't use
+ // buffers to do channel offsets, so we override that parameter
+ // here.
+ if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
+
+ return SUCCESS;
+
+ error:
+ if ( !isDuplexInput ) {
+ // the cleanup for error in the duplex input, is done by RtApi::openStream
+ // So we clean up for single channel only
+
+ if ( buffersAllocated )
+ ASIODisposeBuffers();
+
+ drivers.removeCurrentDriver();
+
+ if ( handle ) {
+ CloseHandle( handle->condition );
+ if ( handle->bufferInfos )
+ free( handle->bufferInfos );
+
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+
+ if ( stream_.userBuffer[mode] ) {
+ free( stream_.userBuffer[mode] );
+ stream_.userBuffer[mode] = 0;
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+ }
+
+ return FAILURE;
+}////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void RtApiAsio :: closeStream()
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiAsio::closeStream(): no open stream to close!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ if ( stream_.state == STREAM_RUNNING ) {
+ stream_.state = STREAM_STOPPED;
+ ASIOStop();
+ }
+ ASIODisposeBuffers();
+ drivers.removeCurrentDriver();
+
+ AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+ if ( handle ) {
+ CloseHandle( handle->condition );
+ if ( handle->bufferInfos )
+ free( handle->bufferInfos );
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.mode = UNINITIALIZED;
+ stream_.state = STREAM_CLOSED;
+}
+
+bool stopThreadCalled = false;
+
+void RtApiAsio :: startStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiAsio::startStream(): the stream is already running!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+ ASIOError result = ASIOStart();
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::startStream: error (" << getAsioErrorString( result ) << ") starting device.";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+
+ handle->drainCounter = 0;
+ handle->internalDrain = false;
+ ResetEvent( handle->condition );
+ stream_.state = STREAM_RUNNING;
+ asioXRun = false;
+
+ unlock:
+ stopThreadCalled = false;
+
+ if ( result == ASE_OK ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAsio :: stopStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiAsio::stopStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ if ( handle->drainCounter == 0 ) {
+ handle->drainCounter = 2;
+ WaitForSingleObject( handle->condition, INFINITE ); // block until signaled
+ }
+ }
+
+ stream_.state = STREAM_STOPPED;
+
+ ASIOError result = ASIOStop();
+ if ( result != ASE_OK ) {
+ errorStream_ << "RtApiAsio::stopStream: error (" << getAsioErrorString( result ) << ") stopping device.";
+ errorText_ = errorStream_.str();
+ }
+
+ if ( result == ASE_OK ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAsio :: abortStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiAsio::abortStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ // The following lines were commented-out because some behavior was
+ // noted where the device buffers need to be zeroed to avoid
+ // continuing sound, even when the device buffers are completely
+ // disposed. So now, calling abort is the same as calling stop.
+ // AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+ // handle->drainCounter = 2;
+ stopStream();
+}
+
+// This function will be called by a spawned thread when the user
+// callback function signals that the stream should be stopped or
+// aborted. It is necessary to handle it this way because the
+// callbackEvent() function must return before the ASIOStop()
+// function will return.
+static unsigned __stdcall asioStopStream( void *ptr )
+{
+ CallbackInfo *info = (CallbackInfo *) ptr;
+ RtApiAsio *object = (RtApiAsio *) info->object;
+
+ object->stopStream();
+ _endthreadex( 0 );
+ return 0;
+}
+
+bool RtApiAsio :: callbackEvent( long bufferIndex )
+{
+ if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiAsio::callbackEvent(): the stream is closed ... this shouldn't happen!";
+ error( RtAudioError::WARNING );
+ return FAILURE;
+ }
+
+ CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+ AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+
+ // Check if we were draining the stream and signal if finished.
+ if ( handle->drainCounter > 3 ) {
+
+ stream_.state = STREAM_STOPPING;
+ if ( handle->internalDrain == false )
+ SetEvent( handle->condition );
+ else { // spawn a thread to stop the stream
+ unsigned threadId;
+ stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
+ &stream_.callbackInfo, 0, &threadId );
+ }
+ return SUCCESS;
+ }
+
+ // Invoke user callback to get fresh output data UNLESS we are
+ // draining stream.
+ if ( handle->drainCounter == 0 ) {
+ RtAudioCallback callback = (RtAudioCallback) info->callback;
+ double streamTime = getStreamTime();
+ RtAudioStreamStatus status = 0;
+ if ( stream_.mode != INPUT && asioXRun == true ) {
+ status |= RTAUDIO_OUTPUT_UNDERFLOW;
+ asioXRun = false;
+ }
+ if ( stream_.mode != OUTPUT && asioXRun == true ) {
+ status |= RTAUDIO_INPUT_OVERFLOW;
+ asioXRun = false;
+ }
+ int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+ stream_.bufferSize, streamTime, status, info->userData );
+ if ( cbReturnValue == 2 ) {
+ stream_.state = STREAM_STOPPING;
+ handle->drainCounter = 2;
+ unsigned threadId;
+ stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
+ &stream_.callbackInfo, 0, &threadId );
+ return SUCCESS;
+ }
+ else if ( cbReturnValue == 1 ) {
+ handle->drainCounter = 1;
+ handle->internalDrain = true;
+ }
+ }
+
+ unsigned int nChannels, bufferBytes, i, j;
+ nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ bufferBytes = stream_.bufferSize * formatBytes( stream_.deviceFormat[0] );
+
+ if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+
+ for ( i=0, j=0; i<nChannels; i++ ) {
+ if ( handle->bufferInfos[i].isInput != ASIOTrue )
+ memset( handle->bufferInfos[i].buffers[bufferIndex], 0, bufferBytes );
+ }
+
+ }
+ else if ( stream_.doConvertBuffer[0] ) {
+
+ convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+ if ( stream_.doByteSwap[0] )
+ byteSwapBuffer( stream_.deviceBuffer,
+ stream_.bufferSize * stream_.nDeviceChannels[0],
+ stream_.deviceFormat[0] );
+
+ for ( i=0, j=0; i<nChannels; i++ ) {
+ if ( handle->bufferInfos[i].isInput != ASIOTrue )
+ memcpy( handle->bufferInfos[i].buffers[bufferIndex],
+ &stream_.deviceBuffer[j++*bufferBytes], bufferBytes );
+ }
+
+ }
+ else {
+
+ if ( stream_.doByteSwap[0] )
+ byteSwapBuffer( stream_.userBuffer[0],
+ stream_.bufferSize * stream_.nUserChannels[0],
+ stream_.userFormat );
+
+ for ( i=0, j=0; i<nChannels; i++ ) {
+ if ( handle->bufferInfos[i].isInput != ASIOTrue )
+ memcpy( handle->bufferInfos[i].buffers[bufferIndex],
+ &stream_.userBuffer[0][bufferBytes*j++], bufferBytes );
+ }
+
+ }
+ }
+
+ // Don't bother draining input
+ if ( handle->drainCounter ) {
+ handle->drainCounter++;
+ goto unlock;
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+ bufferBytes = stream_.bufferSize * formatBytes(stream_.deviceFormat[1]);
+
+ if (stream_.doConvertBuffer[1]) {
+
+ // Always interleave ASIO input data.
+ for ( i=0, j=0; i<nChannels; i++ ) {
+ if ( handle->bufferInfos[i].isInput == ASIOTrue )
+ memcpy( &stream_.deviceBuffer[j++*bufferBytes],
+ handle->bufferInfos[i].buffers[bufferIndex],
+ bufferBytes );
+ }
+
+ if ( stream_.doByteSwap[1] )
+ byteSwapBuffer( stream_.deviceBuffer,
+ stream_.bufferSize * stream_.nDeviceChannels[1],
+ stream_.deviceFormat[1] );
+ convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+
+ }
+ else {
+ for ( i=0, j=0; i<nChannels; i++ ) {
+ if ( handle->bufferInfos[i].isInput == ASIOTrue ) {
+ memcpy( &stream_.userBuffer[1][bufferBytes*j++],
+ handle->bufferInfos[i].buffers[bufferIndex],
+ bufferBytes );
+ }
+ }
+
+ if ( stream_.doByteSwap[1] )
+ byteSwapBuffer( stream_.userBuffer[1],
+ stream_.bufferSize * stream_.nUserChannels[1],
+ stream_.userFormat );
+ }
+ }
+
+ unlock:
+ // The following call was suggested by Malte Clasen. While the API
+ // documentation indicates it should not be required, some device
+ // drivers apparently do not function correctly without it.
+ ASIOOutputReady();
+
+ RtApi::tickStreamTime();
+ return SUCCESS;
+}
+
+static void sampleRateChanged( ASIOSampleRate sRate )
+{
+ // The ASIO documentation says that this usually only happens during
+ // external sync. Audio processing is not stopped by the driver,
+ // actual sample rate might not have even changed, maybe only the
+ // sample rate status of an AES/EBU or S/PDIF digital input at the
+ // audio device.
+
+ RtApi *object = (RtApi *) asioCallbackInfo->object;
+ try {
+ object->stopStream();
+ }
+ catch ( RtAudioError &exception ) {
+ std::cerr << "\nRtApiAsio: sampleRateChanged() error (" << exception.getMessage() << ")!\n" << std::endl;
+ return;
+ }
+
+ std::cerr << "\nRtApiAsio: driver reports sample rate changed to " << sRate << " ... stream stopped!!!\n" << std::endl;
+}
+
+static long asioMessages( long selector, long value, void* /*message*/, double* /*opt*/ )
+{
+ long ret = 0;
+
+ switch( selector ) {
+ case kAsioSelectorSupported:
+ if ( value == kAsioResetRequest
+ || value == kAsioEngineVersion
+ || value == kAsioResyncRequest
+ || value == kAsioLatenciesChanged
+ // The following three were added for ASIO 2.0, you don't
+ // necessarily have to support them.
+ || value == kAsioSupportsTimeInfo
+ || value == kAsioSupportsTimeCode
+ || value == kAsioSupportsInputMonitor)
+ ret = 1L;
+ break;
+ case kAsioResetRequest:
+ // Defer the task and perform the reset of the driver during the
+ // next "safe" situation. You cannot reset the driver right now,
+ // as this code is called from the driver. Reset the driver is
+ // done by completely destruct is. I.e. ASIOStop(),
+ // ASIODisposeBuffers(), Destruction Afterwards you initialize the
+ // driver again.
+ std::cerr << "\nRtApiAsio: driver reset requested!!!" << std::endl;
+ ret = 1L;
+ break;
+ case kAsioResyncRequest:
+ // This informs the application that the driver encountered some
+ // non-fatal data loss. It is used for synchronization purposes
+ // of different media. Added mainly to work around the Win16Mutex
+ // problems in Windows 95/98 with the Windows Multimedia system,
+ // which could lose data because the Mutex was held too long by
+ // another thread. However a driver can issue it in other
+ // situations, too.
+ // std::cerr << "\nRtApiAsio: driver resync requested!!!" << std::endl;
+ asioXRun = true;
+ ret = 1L;
+ break;
+ case kAsioLatenciesChanged:
+ // This will inform the host application that the drivers were
+ // latencies changed. Beware, it this does not mean that the
+ // buffer sizes have changed! You might need to update internal
+ // delay data.
+ std::cerr << "\nRtApiAsio: driver latency may have changed!!!" << std::endl;
+ ret = 1L;
+ break;
+ case kAsioEngineVersion:
+ // Return the supported ASIO version of the host application. If
+ // a host application does not implement this selector, ASIO 1.0
+ // is assumed by the driver.
+ ret = 2L;
+ break;
+ case kAsioSupportsTimeInfo:
+ // Informs the driver whether the
+ // asioCallbacks.bufferSwitchTimeInfo() callback is supported.
+ // For compatibility with ASIO 1.0 drivers the host application
+ // should always support the "old" bufferSwitch method, too.
+ ret = 0;
+ break;
+ case kAsioSupportsTimeCode:
+ // Informs the driver whether application is interested in time
+ // code info. If an application does not need to know about time
+ // code, the driver has less work to do.
+ ret = 0;
+ break;
+ }
+ return ret;
+}
+
+static const char* getAsioErrorString( ASIOError result )
+{
+ struct Messages
+ {
+ ASIOError value;
+ const char*message;
+ };
+
+ static const Messages m[] =
+ {
+ { ASE_NotPresent, "Hardware input or output is not present or available." },
+ { ASE_HWMalfunction, "Hardware is malfunctioning." },
+ { ASE_InvalidParameter, "Invalid input parameter." },
+ { ASE_InvalidMode, "Invalid mode." },
+ { ASE_SPNotAdvancing, "Sample position not advancing." },
+ { ASE_NoClock, "Sample clock or rate cannot be determined or is not present." },
+ { ASE_NoMemory, "Not enough memory to complete the request." }
+ };
+
+ for ( unsigned int i = 0; i < sizeof(m)/sizeof(m[0]); ++i )
+ if ( m[i].value == result ) return m[i].message;
+
+ return "Unknown error.";
+}
+
+//******************** End of __WINDOWS_ASIO__ *********************//
+#endif
+
+
+#if defined(__WINDOWS_WASAPI__) // Windows WASAPI API
+
+// Authored by Marcus Tomlinson <themarcustomlinson@gmail.com>, April 2014
+// - Introduces support for the Windows WASAPI API
+// - Aims to deliver bit streams to and from hardware at the lowest possible latency, via the absolute minimum buffer sizes required
+// - Provides flexible stream configuration to an otherwise strict and inflexible WASAPI interface
+// - Includes automatic internal conversion of sample rate and buffer size between hardware and the user
+
+#ifndef INITGUID
+ #define INITGUID
+#endif
+#include <audioclient.h>
+#include <avrt.h>
+#include <mmdeviceapi.h>
+#include <functiondiscoverykeys_devpkey.h>
+
+//=============================================================================
+
+#define SAFE_RELEASE( objectPtr )\
+if ( objectPtr )\
+{\
+ objectPtr->Release();\
+ objectPtr = NULL;\
+}
+
+typedef HANDLE ( __stdcall *TAvSetMmThreadCharacteristicsPtr )( LPCWSTR TaskName, LPDWORD TaskIndex );
+
+//-----------------------------------------------------------------------------
+
+// WASAPI dictates stream sample rate, format, channel count, and in some cases, buffer size.
+// Therefore we must perform all necessary conversions to user buffers in order to satisfy these
+// requirements. WasapiBuffer ring buffers are used between HwIn->UserIn and UserOut->HwOut to
+// provide intermediate storage for read / write synchronization.
+class WasapiBuffer
+{
+public:
+ WasapiBuffer()
+ : buffer_( NULL ),
+ bufferSize_( 0 ),
+ inIndex_( 0 ),
+ outIndex_( 0 ) {}
+
+ ~WasapiBuffer() {
+ free( buffer_ );
+ }
+
+ // sets the length of the internal ring buffer
+ void setBufferSize( unsigned int bufferSize, unsigned int formatBytes ) {
+ free( buffer_ );
+
+ buffer_ = ( char* ) calloc( bufferSize, formatBytes );
+
+ bufferSize_ = bufferSize;
+ inIndex_ = 0;
+ outIndex_ = 0;
+ }
+
+ // attempt to push a buffer into the ring buffer at the current "in" index
+ bool pushBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
+ {
+ if ( !buffer || // incoming buffer is NULL
+ bufferSize == 0 || // incoming buffer has no data
+ bufferSize > bufferSize_ ) // incoming buffer too large
+ {
+ return false;
+ }
+
+ unsigned int relOutIndex = outIndex_;
+ unsigned int inIndexEnd = inIndex_ + bufferSize;
+ if ( relOutIndex < inIndex_ && inIndexEnd >= bufferSize_ ) {
+ relOutIndex += bufferSize_;
+ }
+
+ // "in" index can end on the "out" index but cannot begin at it
+ if ( inIndex_ <= relOutIndex && inIndexEnd > relOutIndex ) {
+ return false; // not enough space between "in" index and "out" index
+ }
+
+ // copy buffer from external to internal
+ int fromZeroSize = inIndex_ + bufferSize - bufferSize_;
+ fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
+ int fromInSize = bufferSize - fromZeroSize;
+
+ switch( format )
+ {
+ case RTAUDIO_SINT8:
+ memcpy( &( ( char* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( char ) );
+ memcpy( buffer_, &( ( char* ) buffer )[fromInSize], fromZeroSize * sizeof( char ) );
+ break;
+ case RTAUDIO_SINT16:
+ memcpy( &( ( short* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( short ) );
+ memcpy( buffer_, &( ( short* ) buffer )[fromInSize], fromZeroSize * sizeof( short ) );
+ break;
+ case RTAUDIO_SINT24:
+ memcpy( &( ( S24* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( S24 ) );
+ memcpy( buffer_, &( ( S24* ) buffer )[fromInSize], fromZeroSize * sizeof( S24 ) );
+ break;
+ case RTAUDIO_SINT32:
+ memcpy( &( ( int* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( int ) );
+ memcpy( buffer_, &( ( int* ) buffer )[fromInSize], fromZeroSize * sizeof( int ) );
+ break;
+ case RTAUDIO_FLOAT32:
+ memcpy( &( ( float* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( float ) );
+ memcpy( buffer_, &( ( float* ) buffer )[fromInSize], fromZeroSize * sizeof( float ) );
+ break;
+ case RTAUDIO_FLOAT64:
+ memcpy( &( ( double* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( double ) );
+ memcpy( buffer_, &( ( double* ) buffer )[fromInSize], fromZeroSize * sizeof( double ) );
+ break;
+ }
+
+ // update "in" index
+ inIndex_ += bufferSize;
+ inIndex_ %= bufferSize_;
+
+ return true;
+ }
+
+ // attempt to pull a buffer from the ring buffer from the current "out" index
+ bool pullBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
+ {
+ if ( !buffer || // incoming buffer is NULL
+ bufferSize == 0 || // incoming buffer has no data
+ bufferSize > bufferSize_ ) // incoming buffer too large
+ {
+ return false;
+ }
+
+ unsigned int relInIndex = inIndex_;
+ unsigned int outIndexEnd = outIndex_ + bufferSize;
+ if ( relInIndex < outIndex_ && outIndexEnd >= bufferSize_ ) {
+ relInIndex += bufferSize_;
+ }
+
+ // "out" index can begin at and end on the "in" index
+ if ( outIndex_ < relInIndex && outIndexEnd > relInIndex ) {
+ return false; // not enough space between "out" index and "in" index
+ }
+
+ // copy buffer from internal to external
+ int fromZeroSize = outIndex_ + bufferSize - bufferSize_;
+ fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
+ int fromOutSize = bufferSize - fromZeroSize;
+
+ switch( format )
+ {
+ case RTAUDIO_SINT8:
+ memcpy( buffer, &( ( char* ) buffer_ )[outIndex_], fromOutSize * sizeof( char ) );
+ memcpy( &( ( char* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( char ) );
+ break;
+ case RTAUDIO_SINT16:
+ memcpy( buffer, &( ( short* ) buffer_ )[outIndex_], fromOutSize * sizeof( short ) );
+ memcpy( &( ( short* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( short ) );
+ break;
+ case RTAUDIO_SINT24:
+ memcpy( buffer, &( ( S24* ) buffer_ )[outIndex_], fromOutSize * sizeof( S24 ) );
+ memcpy( &( ( S24* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( S24 ) );
+ break;
+ case RTAUDIO_SINT32:
+ memcpy( buffer, &( ( int* ) buffer_ )[outIndex_], fromOutSize * sizeof( int ) );
+ memcpy( &( ( int* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( int ) );
+ break;
+ case RTAUDIO_FLOAT32:
+ memcpy( buffer, &( ( float* ) buffer_ )[outIndex_], fromOutSize * sizeof( float ) );
+ memcpy( &( ( float* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( float ) );
+ break;
+ case RTAUDIO_FLOAT64:
+ memcpy( buffer, &( ( double* ) buffer_ )[outIndex_], fromOutSize * sizeof( double ) );
+ memcpy( &( ( double* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( double ) );
+ break;
+ }
+
+ // update "out" index
+ outIndex_ += bufferSize;
+ outIndex_ %= bufferSize_;
+
+ return true;
+ }
+
+private:
+ char* buffer_;
+ unsigned int bufferSize_;
+ unsigned int inIndex_;
+ unsigned int outIndex_;
+};
+
+//-----------------------------------------------------------------------------
+
+// In order to satisfy WASAPI's buffer requirements, we need a means of converting sample rate
+// between HW and the user. The convertBufferWasapi function is used to perform this conversion
+// between HwIn->UserIn and UserOut->HwOut during the stream callback loop.
+// This sample rate converter favors speed over quality, and works best with conversions between
+// one rate and its multiple.
+void convertBufferWasapi( char* outBuffer,
+ const char* inBuffer,
+ const unsigned int& channelCount,
+ const unsigned int& inSampleRate,
+ const unsigned int& outSampleRate,
+ const unsigned int& inSampleCount,
+ unsigned int& outSampleCount,
+ const RtAudioFormat& format )
+{
+ // calculate the new outSampleCount and relative sampleStep
+ float sampleRatio = ( float ) outSampleRate / inSampleRate;
+ float sampleStep = 1.0f / sampleRatio;
+ float inSampleFraction = 0.0f;
+
+ outSampleCount = ( unsigned int ) roundf( inSampleCount * sampleRatio );
+
+ // frame-by-frame, copy each relative input sample into it's corresponding output sample
+ for ( unsigned int outSample = 0; outSample < outSampleCount; outSample++ )
+ {
+ unsigned int inSample = ( unsigned int ) inSampleFraction;
+
+ switch ( format )
+ {
+ case RTAUDIO_SINT8:
+ memcpy( &( ( char* ) outBuffer )[ outSample * channelCount ], &( ( char* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( char ) );
+ break;
+ case RTAUDIO_SINT16:
+ memcpy( &( ( short* ) outBuffer )[ outSample * channelCount ], &( ( short* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( short ) );
+ break;
+ case RTAUDIO_SINT24:
+ memcpy( &( ( S24* ) outBuffer )[ outSample * channelCount ], &( ( S24* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( S24 ) );
+ break;
+ case RTAUDIO_SINT32:
+ memcpy( &( ( int* ) outBuffer )[ outSample * channelCount ], &( ( int* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( int ) );
+ break;
+ case RTAUDIO_FLOAT32:
+ memcpy( &( ( float* ) outBuffer )[ outSample * channelCount ], &( ( float* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( float ) );
+ break;
+ case RTAUDIO_FLOAT64:
+ memcpy( &( ( double* ) outBuffer )[ outSample * channelCount ], &( ( double* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( double ) );
+ break;
+ }
+
+ // jump to next in sample
+ inSampleFraction += sampleStep;
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+// A structure to hold various information related to the WASAPI implementation.
+struct WasapiHandle
+{
+ IAudioClient* captureAudioClient;
+ IAudioClient* renderAudioClient;
+ IAudioCaptureClient* captureClient;
+ IAudioRenderClient* renderClient;
+ HANDLE captureEvent;
+ HANDLE renderEvent;
+
+ WasapiHandle()
+ : captureAudioClient( NULL ),
+ renderAudioClient( NULL ),
+ captureClient( NULL ),
+ renderClient( NULL ),
+ captureEvent( NULL ),
+ renderEvent( NULL ) {}
+};
+
+//=============================================================================
+
+RtApiWasapi::RtApiWasapi()
+ : coInitialized_( false ), deviceEnumerator_( NULL )
+{
+ // WASAPI can run either apartment or multi-threaded
+ HRESULT hr = CoInitialize( NULL );
+ if ( !FAILED( hr ) )
+ coInitialized_ = true;
+
+ // Instantiate device enumerator
+ hr = CoCreateInstance( __uuidof( MMDeviceEnumerator ), NULL,
+ CLSCTX_ALL, __uuidof( IMMDeviceEnumerator ),
+ ( void** ) &deviceEnumerator_ );
+
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::RtApiWasapi: Unable to instantiate device enumerator";
+ error( RtAudioError::DRIVER_ERROR );
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+RtApiWasapi::~RtApiWasapi()
+{
+ if ( stream_.state != STREAM_CLOSED )
+ closeStream();
+
+ SAFE_RELEASE( deviceEnumerator_ );
+
+ // If this object previously called CoInitialize()
+ if ( coInitialized_ )
+ CoUninitialize();
+}
+
+//=============================================================================
+
+unsigned int RtApiWasapi::getDeviceCount( void )
+{
+ unsigned int captureDeviceCount = 0;
+ unsigned int renderDeviceCount = 0;
+
+ IMMDeviceCollection* captureDevices = NULL;
+ IMMDeviceCollection* renderDevices = NULL;
+
+ // Count capture devices
+ errorText_.clear();
+ HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device collection.";
+ goto Exit;
+ }
+
+ hr = captureDevices->GetCount( &captureDeviceCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device count.";
+ goto Exit;
+ }
+
+ // Count render devices
+ hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device collection.";
+ goto Exit;
+ }
+
+ hr = renderDevices->GetCount( &renderDeviceCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device count.";
+ goto Exit;
+ }
+
+Exit:
+ // release all references
+ SAFE_RELEASE( captureDevices );
+ SAFE_RELEASE( renderDevices );
+
+ if ( errorText_.empty() )
+ return captureDeviceCount + renderDeviceCount;
+
+ error( RtAudioError::DRIVER_ERROR );
+ return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+RtAudio::DeviceInfo RtApiWasapi::getDeviceInfo( unsigned int device )
+{
+ RtAudio::DeviceInfo info;
+ unsigned int captureDeviceCount = 0;
+ unsigned int renderDeviceCount = 0;
+ std::string defaultDeviceName;
+ bool isCaptureDevice = false;
+
+ PROPVARIANT deviceNameProp;
+ PROPVARIANT defaultDeviceNameProp;
+
+ IMMDeviceCollection* captureDevices = NULL;
+ IMMDeviceCollection* renderDevices = NULL;
+ IMMDevice* devicePtr = NULL;
+ IMMDevice* defaultDevicePtr = NULL;
+ IAudioClient* audioClient = NULL;
+ IPropertyStore* devicePropStore = NULL;
+ IPropertyStore* defaultDevicePropStore = NULL;
+
+ WAVEFORMATEX* deviceFormat = NULL;
+ WAVEFORMATEX* closestMatchFormat = NULL;
+
+ // probed
+ info.probed = false;
+
+ // Count capture devices
+ errorText_.clear();
+ RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
+ HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device collection.";
+ goto Exit;
+ }
+
+ hr = captureDevices->GetCount( &captureDeviceCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device count.";
+ goto Exit;
+ }
+
+ // Count render devices
+ hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device collection.";
+ goto Exit;
+ }
+
+ hr = renderDevices->GetCount( &renderDeviceCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device count.";
+ goto Exit;
+ }
+
+ // validate device index
+ if ( device >= captureDeviceCount + renderDeviceCount ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Invalid device index.";
+ errorType = RtAudioError::INVALID_USE;
+ goto Exit;
+ }
+
+ // determine whether index falls within capture or render devices
+ if ( device >= renderDeviceCount ) {
+ hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device handle.";
+ goto Exit;
+ }
+ isCaptureDevice = true;
+ }
+ else {
+ hr = renderDevices->Item( device, &devicePtr );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device handle.";
+ goto Exit;
+ }
+ isCaptureDevice = false;
+ }
+
+ // get default device name
+ if ( isCaptureDevice ) {
+ hr = deviceEnumerator_->GetDefaultAudioEndpoint( eCapture, eConsole, &defaultDevicePtr );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default capture device handle.";
+ goto Exit;
+ }
+ }
+ else {
+ hr = deviceEnumerator_->GetDefaultAudioEndpoint( eRender, eConsole, &defaultDevicePtr );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default render device handle.";
+ goto Exit;
+ }
+ }
+
+ hr = defaultDevicePtr->OpenPropertyStore( STGM_READ, &defaultDevicePropStore );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open default device property store.";
+ goto Exit;
+ }
+ PropVariantInit( &defaultDeviceNameProp );
+
+ hr = defaultDevicePropStore->GetValue( PKEY_Device_FriendlyName, &defaultDeviceNameProp );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default device property: PKEY_Device_FriendlyName.";
+ goto Exit;
+ }
+
+ defaultDeviceName = convertCharPointerToStdString(defaultDeviceNameProp.pwszVal);
+
+ // name
+ hr = devicePtr->OpenPropertyStore( STGM_READ, &devicePropStore );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open device property store.";
+ goto Exit;
+ }
+
+ PropVariantInit( &deviceNameProp );
+
+ hr = devicePropStore->GetValue( PKEY_Device_FriendlyName, &deviceNameProp );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device property: PKEY_Device_FriendlyName.";
+ goto Exit;
+ }
+
+ info.name =convertCharPointerToStdString(deviceNameProp.pwszVal);
+
+ // is default
+ if ( isCaptureDevice ) {
+ info.isDefaultInput = info.name == defaultDeviceName;
+ info.isDefaultOutput = false;
+ }
+ else {
+ info.isDefaultInput = false;
+ info.isDefaultOutput = info.name == defaultDeviceName;
+ }
+
+ // channel count
+ hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, NULL, ( void** ) &audioClient );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device audio client.";
+ goto Exit;
+ }
+
+ hr = audioClient->GetMixFormat( &deviceFormat );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device mix format.";
+ goto Exit;
+ }
+
+ if ( isCaptureDevice ) {
+ info.inputChannels = deviceFormat->nChannels;
+ info.outputChannels = 0;
+ info.duplexChannels = 0;
+ }
+ else {
+ info.inputChannels = 0;
+ info.outputChannels = deviceFormat->nChannels;
+ info.duplexChannels = 0;
+ }
+
+ // sample rates
+ info.sampleRates.clear();
+
+ // allow support for all sample rates as we have a built-in sample rate converter
+ for ( unsigned int i = 0; i < MAX_SAMPLE_RATES; i++ ) {
+ info.sampleRates.push_back( SAMPLE_RATES[i] );
+ }
+ info.preferredSampleRate = deviceFormat->nSamplesPerSec;
+
+ // native format
+ info.nativeFormats = 0;
+
+ if ( deviceFormat->wFormatTag == WAVE_FORMAT_IEEE_FLOAT ||
+ ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
+ ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ) )
+ {
+ if ( deviceFormat->wBitsPerSample == 32 ) {
+ info.nativeFormats |= RTAUDIO_FLOAT32;
+ }
+ else if ( deviceFormat->wBitsPerSample == 64 ) {
+ info.nativeFormats |= RTAUDIO_FLOAT64;
+ }
+ }
+ else if ( deviceFormat->wFormatTag == WAVE_FORMAT_PCM ||
+ ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
+ ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_PCM ) )
+ {
+ if ( deviceFormat->wBitsPerSample == 8 ) {
+ info.nativeFormats |= RTAUDIO_SINT8;
+ }
+ else if ( deviceFormat->wBitsPerSample == 16 ) {
+ info.nativeFormats |= RTAUDIO_SINT16;
+ }
+ else if ( deviceFormat->wBitsPerSample == 24 ) {
+ info.nativeFormats |= RTAUDIO_SINT24;
+ }
+ else if ( deviceFormat->wBitsPerSample == 32 ) {
+ info.nativeFormats |= RTAUDIO_SINT32;
+ }
+ }
+
+ // probed
+ info.probed = true;
+
+Exit:
+ // release all references
+ PropVariantClear( &deviceNameProp );
+ PropVariantClear( &defaultDeviceNameProp );
+
+ SAFE_RELEASE( captureDevices );
+ SAFE_RELEASE( renderDevices );
+ SAFE_RELEASE( devicePtr );
+ SAFE_RELEASE( defaultDevicePtr );
+ SAFE_RELEASE( audioClient );
+ SAFE_RELEASE( devicePropStore );
+ SAFE_RELEASE( defaultDevicePropStore );
+
+ CoTaskMemFree( deviceFormat );
+ CoTaskMemFree( closestMatchFormat );
+
+ if ( !errorText_.empty() )
+ error( errorType );
+ return info;
+}
+
+//-----------------------------------------------------------------------------
+
+unsigned int RtApiWasapi::getDefaultOutputDevice( void )
+{
+ for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
+ if ( getDeviceInfo( i ).isDefaultOutput ) {
+ return i;
+ }
+ }
+
+ return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+unsigned int RtApiWasapi::getDefaultInputDevice( void )
+{
+ for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
+ if ( getDeviceInfo( i ).isDefaultInput ) {
+ return i;
+ }
+ }
+
+ return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::closeStream( void )
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiWasapi::closeStream: No open stream to close.";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ if ( stream_.state != STREAM_STOPPED )
+ stopStream();
+
+ // clean up stream memory
+ SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient )
+ SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient )
+
+ SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureClient )
+ SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderClient )
+
+ if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent )
+ CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent );
+
+ if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent )
+ CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent );
+
+ delete ( WasapiHandle* ) stream_.apiHandle;
+ stream_.apiHandle = NULL;
+
+ for ( int i = 0; i < 2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ // update stream state
+ stream_.state = STREAM_CLOSED;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::startStream( void )
+{
+ verifyStream();
+
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiWasapi::startStream: The stream is already running.";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ // update stream state
+ stream_.state = STREAM_RUNNING;
+
+ // create WASAPI stream thread
+ stream_.callbackInfo.thread = ( ThreadHandle ) CreateThread( NULL, 0, runWasapiThread, this, CREATE_SUSPENDED, NULL );
+
+ if ( !stream_.callbackInfo.thread ) {
+ errorText_ = "RtApiWasapi::startStream: Unable to instantiate callback thread.";
+ error( RtAudioError::THREAD_ERROR );
+ }
+ else {
+ SetThreadPriority( ( void* ) stream_.callbackInfo.thread, stream_.callbackInfo.priority );
+ ResumeThread( ( void* ) stream_.callbackInfo.thread );
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::stopStream( void )
+{
+ verifyStream();
+
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiWasapi::stopStream: The stream is already stopped.";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ // inform stream thread by setting stream state to STREAM_STOPPING
+ stream_.state = STREAM_STOPPING;
+
+ // wait until stream thread is stopped
+ while( stream_.state != STREAM_STOPPED ) {
+ Sleep( 1 );
+ }
+
+ // Wait for the last buffer to play before stopping.
+ Sleep( 1000 * stream_.bufferSize / stream_.sampleRate );
+
+ // stop capture client if applicable
+ if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
+ HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::stopStream: Unable to stop capture stream.";
+ error( RtAudioError::DRIVER_ERROR );
+ return;
+ }
+ }
+
+ // stop render client if applicable
+ if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
+ HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::stopStream: Unable to stop render stream.";
+ error( RtAudioError::DRIVER_ERROR );
+ return;
+ }
+ }
+
+ // close thread handle
+ if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
+ errorText_ = "RtApiWasapi::stopStream: Unable to close callback thread.";
+ error( RtAudioError::THREAD_ERROR );
+ return;
+ }
+
+ stream_.callbackInfo.thread = (ThreadHandle) NULL;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::abortStream( void )
+{
+ verifyStream();
+
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiWasapi::abortStream: The stream is already stopped.";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ // inform stream thread by setting stream state to STREAM_STOPPING
+ stream_.state = STREAM_STOPPING;
+
+ // wait until stream thread is stopped
+ while ( stream_.state != STREAM_STOPPED ) {
+ Sleep( 1 );
+ }
+
+ // stop capture client if applicable
+ if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
+ HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::abortStream: Unable to stop capture stream.";
+ error( RtAudioError::DRIVER_ERROR );
+ return;
+ }
+ }
+
+ // stop render client if applicable
+ if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
+ HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::abortStream: Unable to stop render stream.";
+ error( RtAudioError::DRIVER_ERROR );
+ return;
+ }
+ }
+
+ // close thread handle
+ if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
+ errorText_ = "RtApiWasapi::abortStream: Unable to close callback thread.";
+ error( RtAudioError::THREAD_ERROR );
+ return;
+ }
+
+ stream_.callbackInfo.thread = (ThreadHandle) NULL;
+}
+
+//-----------------------------------------------------------------------------
+
+bool RtApiWasapi::probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+ unsigned int firstChannel, unsigned int sampleRate,
+ RtAudioFormat format, unsigned int* bufferSize,
+ RtAudio::StreamOptions* options )
+{
+ bool methodResult = FAILURE;
+ unsigned int captureDeviceCount = 0;
+ unsigned int renderDeviceCount = 0;
+
+ IMMDeviceCollection* captureDevices = NULL;
+ IMMDeviceCollection* renderDevices = NULL;
+ IMMDevice* devicePtr = NULL;
+ WAVEFORMATEX* deviceFormat = NULL;
+ unsigned int bufferBytes;
+ stream_.state = STREAM_STOPPED;
+
+ // create API Handle if not already created
+ if ( !stream_.apiHandle )
+ stream_.apiHandle = ( void* ) new WasapiHandle();
+
+ // Count capture devices
+ errorText_.clear();
+ RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
+ HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device collection.";
+ goto Exit;
+ }
+
+ hr = captureDevices->GetCount( &captureDeviceCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device count.";
+ goto Exit;
+ }
+
+ // Count render devices
+ hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device collection.";
+ goto Exit;
+ }
+
+ hr = renderDevices->GetCount( &renderDeviceCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device count.";
+ goto Exit;
+ }
+
+ // validate device index
+ if ( device >= captureDeviceCount + renderDeviceCount ) {
+ errorType = RtAudioError::INVALID_USE;
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Invalid device index.";
+ goto Exit;
+ }
+
+ // determine whether index falls within capture or render devices
+ if ( device >= renderDeviceCount ) {
+ if ( mode != INPUT ) {
+ errorType = RtAudioError::INVALID_USE;
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Capture device selected as output device.";
+ goto Exit;
+ }
+
+ // retrieve captureAudioClient from devicePtr
+ IAudioClient*& captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
+
+ hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device handle.";
+ goto Exit;
+ }
+
+ hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
+ NULL, ( void** ) &captureAudioClient );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
+ goto Exit;
+ }
+
+ hr = captureAudioClient->GetMixFormat( &deviceFormat );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
+ goto Exit;
+ }
+
+ stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
+ captureAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
+ }
+ else {
+ if ( mode != OUTPUT ) {
+ errorType = RtAudioError::INVALID_USE;
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Render device selected as input device.";
+ goto Exit;
+ }
+
+ // retrieve renderAudioClient from devicePtr
+ IAudioClient*& renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
+
+ hr = renderDevices->Item( device, &devicePtr );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device handle.";
+ goto Exit;
+ }
+
+ hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
+ NULL, ( void** ) &renderAudioClient );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
+ goto Exit;
+ }
+
+ hr = renderAudioClient->GetMixFormat( &deviceFormat );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
+ goto Exit;
+ }
+
+ stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
+ renderAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
+ }
+
+ // fill stream data
+ if ( ( stream_.mode == OUTPUT && mode == INPUT ) ||
+ ( stream_.mode == INPUT && mode == OUTPUT ) ) {
+ stream_.mode = DUPLEX;
+ }
+ else {
+ stream_.mode = mode;
+ }
+
+ stream_.device[mode] = device;
+ stream_.doByteSwap[mode] = false;
+ stream_.sampleRate = sampleRate;
+ stream_.bufferSize = *bufferSize;
+ stream_.nBuffers = 1;
+ stream_.nUserChannels[mode] = channels;
+ stream_.channelOffset[mode] = firstChannel;
+ stream_.userFormat = format;
+ stream_.deviceFormat[mode] = getDeviceInfo( device ).nativeFormats;
+
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
+ stream_.userInterleaved = false;
+ else
+ stream_.userInterleaved = true;
+ stream_.deviceInterleaved[mode] = true;
+
+ // Set flags for buffer conversion.
+ stream_.doConvertBuffer[mode] = false;
+ if ( stream_.userFormat != stream_.deviceFormat[mode] ||
+ stream_.nUserChannels != stream_.nDeviceChannels )
+ stream_.doConvertBuffer[mode] = true;
+ else if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+ stream_.nUserChannels[mode] > 1 )
+ stream_.doConvertBuffer[mode] = true;
+
+ if ( stream_.doConvertBuffer[mode] )
+ setConvertInfo( mode, 0 );
+
+ // Allocate necessary internal buffers
+ bufferBytes = stream_.nUserChannels[mode] * stream_.bufferSize * formatBytes( stream_.userFormat );
+
+ stream_.userBuffer[mode] = ( char* ) calloc( bufferBytes, 1 );
+ if ( !stream_.userBuffer[mode] ) {
+ errorType = RtAudioError::MEMORY_ERROR;
+ errorText_ = "RtApiWasapi::probeDeviceOpen: Error allocating user buffer memory.";
+ goto Exit;
+ }
+
+ if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME )
+ stream_.callbackInfo.priority = 15;
+ else
+ stream_.callbackInfo.priority = 0;
+
+ ///! TODO: RTAUDIO_MINIMIZE_LATENCY // Provide stream buffers directly to callback
+ ///! TODO: RTAUDIO_HOG_DEVICE // Exclusive mode
+
+ methodResult = SUCCESS;
+
+Exit:
+ //clean up
+ SAFE_RELEASE( captureDevices );
+ SAFE_RELEASE( renderDevices );
+ SAFE_RELEASE( devicePtr );
+ CoTaskMemFree( deviceFormat );
+
+ // if method failed, close the stream
+ if ( methodResult == FAILURE )
+ closeStream();
+
+ if ( !errorText_.empty() )
+ error( errorType );
+ return methodResult;
+}
+
+//=============================================================================
+
+DWORD WINAPI RtApiWasapi::runWasapiThread( void* wasapiPtr )
+{
+ if ( wasapiPtr )
+ ( ( RtApiWasapi* ) wasapiPtr )->wasapiThread();
+
+ return 0;
+}
+
+DWORD WINAPI RtApiWasapi::stopWasapiThread( void* wasapiPtr )
+{
+ if ( wasapiPtr )
+ ( ( RtApiWasapi* ) wasapiPtr )->stopStream();
+
+ return 0;
+}
+
+DWORD WINAPI RtApiWasapi::abortWasapiThread( void* wasapiPtr )
+{
+ if ( wasapiPtr )
+ ( ( RtApiWasapi* ) wasapiPtr )->abortStream();
+
+ return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::wasapiThread()
+{
+ // as this is a new thread, we must CoInitialize it
+ CoInitialize( NULL );
+
+ HRESULT hr;
+
+ IAudioClient* captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
+ IAudioClient* renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
+ IAudioCaptureClient* captureClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureClient;
+ IAudioRenderClient* renderClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderClient;
+ HANDLE captureEvent = ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent;
+ HANDLE renderEvent = ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent;
+
+ WAVEFORMATEX* captureFormat = NULL;
+ WAVEFORMATEX* renderFormat = NULL;
+ float captureSrRatio = 0.0f;
+ float renderSrRatio = 0.0f;
+ WasapiBuffer captureBuffer;
+ WasapiBuffer renderBuffer;
+
+ // declare local stream variables
+ RtAudioCallback callback = ( RtAudioCallback ) stream_.callbackInfo.callback;
+ BYTE* streamBuffer = NULL;
+ unsigned long captureFlags = 0;
+ unsigned int bufferFrameCount = 0;
+ unsigned int numFramesPadding = 0;
+ unsigned int convBufferSize = 0;
+ bool callbackPushed = false;
+ bool callbackPulled = false;
+ bool callbackStopped = false;
+ int callbackResult = 0;
+
+ // convBuffer is used to store converted buffers between WASAPI and the user
+ char* convBuffer = NULL;
+ unsigned int convBuffSize = 0;
+ unsigned int deviceBuffSize = 0;
+
+ errorText_.clear();
+ RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
+
+ // Attempt to assign "Pro Audio" characteristic to thread
+ HMODULE AvrtDll = LoadLibrary( (LPCTSTR) "AVRT.dll" );
+ if ( AvrtDll ) {
+ DWORD taskIndex = 0;
+ TAvSetMmThreadCharacteristicsPtr AvSetMmThreadCharacteristicsPtr = ( TAvSetMmThreadCharacteristicsPtr ) GetProcAddress( AvrtDll, "AvSetMmThreadCharacteristicsW" );
+ AvSetMmThreadCharacteristicsPtr( L"Pro Audio", &taskIndex );
+ FreeLibrary( AvrtDll );
+ }
+
+ // start capture stream if applicable
+ if ( captureAudioClient ) {
+ hr = captureAudioClient->GetMixFormat( &captureFormat );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
+ goto Exit;
+ }
+
+ captureSrRatio = ( ( float ) captureFormat->nSamplesPerSec / stream_.sampleRate );
+
+ // initialize capture stream according to desire buffer size
+ float desiredBufferSize = stream_.bufferSize * captureSrRatio;
+ REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / captureFormat->nSamplesPerSec );
+
+ if ( !captureClient ) {
+ hr = captureAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
+ AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
+ desiredBufferPeriod,
+ desiredBufferPeriod,
+ captureFormat,
+ NULL );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize capture audio client.";
+ goto Exit;
+ }
+
+ hr = captureAudioClient->GetService( __uuidof( IAudioCaptureClient ),
+ ( void** ) &captureClient );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture client handle.";
+ goto Exit;
+ }
+
+ // configure captureEvent to trigger on every available capture buffer
+ captureEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
+ if ( !captureEvent ) {
+ errorType = RtAudioError::SYSTEM_ERROR;
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to create capture event.";
+ goto Exit;
+ }
+
+ hr = captureAudioClient->SetEventHandle( captureEvent );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to set capture event handle.";
+ goto Exit;
+ }
+
+ ( ( WasapiHandle* ) stream_.apiHandle )->captureClient = captureClient;
+ ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent = captureEvent;
+ }
+
+ unsigned int inBufferSize = 0;
+ hr = captureAudioClient->GetBufferSize( &inBufferSize );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to get capture buffer size.";
+ goto Exit;
+ }
+
+ // scale outBufferSize according to stream->user sample rate ratio
+ unsigned int outBufferSize = ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT];
+ inBufferSize *= stream_.nDeviceChannels[INPUT];
+
+ // set captureBuffer size
+ captureBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[INPUT] ) );
+
+ // reset the capture stream
+ hr = captureAudioClient->Reset();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to reset capture stream.";
+ goto Exit;
+ }
+
+ // start the capture stream
+ hr = captureAudioClient->Start();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to start capture stream.";
+ goto Exit;
+ }
+ }
+
+ // start render stream if applicable
+ if ( renderAudioClient ) {
+ hr = renderAudioClient->GetMixFormat( &renderFormat );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
+ goto Exit;
+ }
+
+ renderSrRatio = ( ( float ) renderFormat->nSamplesPerSec / stream_.sampleRate );
+
+ // initialize render stream according to desire buffer size
+ float desiredBufferSize = stream_.bufferSize * renderSrRatio;
+ REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / renderFormat->nSamplesPerSec );
+
+ if ( !renderClient ) {
+ hr = renderAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
+ AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
+ desiredBufferPeriod,
+ desiredBufferPeriod,
+ renderFormat,
+ NULL );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize render audio client.";
+ goto Exit;
+ }
+
+ hr = renderAudioClient->GetService( __uuidof( IAudioRenderClient ),
+ ( void** ) &renderClient );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render client handle.";
+ goto Exit;
+ }
+
+ // configure renderEvent to trigger on every available render buffer
+ renderEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
+ if ( !renderEvent ) {
+ errorType = RtAudioError::SYSTEM_ERROR;
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to create render event.";
+ goto Exit;
+ }
+
+ hr = renderAudioClient->SetEventHandle( renderEvent );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to set render event handle.";
+ goto Exit;
+ }
+
+ ( ( WasapiHandle* ) stream_.apiHandle )->renderClient = renderClient;
+ ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent = renderEvent;
+ }
+
+ unsigned int outBufferSize = 0;
+ hr = renderAudioClient->GetBufferSize( &outBufferSize );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to get render buffer size.";
+ goto Exit;
+ }
+
+ // scale inBufferSize according to user->stream sample rate ratio
+ unsigned int inBufferSize = ( unsigned int ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT];
+ outBufferSize *= stream_.nDeviceChannels[OUTPUT];
+
+ // set renderBuffer size
+ renderBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[OUTPUT] ) );
+
+ // reset the render stream
+ hr = renderAudioClient->Reset();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to reset render stream.";
+ goto Exit;
+ }
+
+ // start the render stream
+ hr = renderAudioClient->Start();
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to start render stream.";
+ goto Exit;
+ }
+ }
+
+ if ( stream_.mode == INPUT ) {
+ convBuffSize = ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
+ deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
+ }
+ else if ( stream_.mode == OUTPUT ) {
+ convBuffSize = ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
+ deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
+ }
+ else if ( stream_.mode == DUPLEX ) {
+ convBuffSize = std::max( ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
+ ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
+ deviceBuffSize = std::max( stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
+ stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
+ }
+
+ convBuffer = ( char* ) malloc( convBuffSize );
+ stream_.deviceBuffer = ( char* ) malloc( deviceBuffSize );
+ if ( !convBuffer || !stream_.deviceBuffer ) {
+ errorType = RtAudioError::MEMORY_ERROR;
+ errorText_ = "RtApiWasapi::wasapiThread: Error allocating device buffer memory.";
+ goto Exit;
+ }
+
+ // stream process loop
+ while ( stream_.state != STREAM_STOPPING ) {
+ if ( !callbackPulled ) {
+ // Callback Input
+ // ==============
+ // 1. Pull callback buffer from inputBuffer
+ // 2. If 1. was successful: Convert callback buffer to user sample rate and channel count
+ // Convert callback buffer to user format
+
+ if ( captureAudioClient ) {
+ // Pull callback buffer from inputBuffer
+ callbackPulled = captureBuffer.pullBuffer( convBuffer,
+ ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT],
+ stream_.deviceFormat[INPUT] );
+
+ if ( callbackPulled ) {
+ // Convert callback buffer to user sample rate
+ convertBufferWasapi( stream_.deviceBuffer,
+ convBuffer,
+ stream_.nDeviceChannels[INPUT],
+ captureFormat->nSamplesPerSec,
+ stream_.sampleRate,
+ ( unsigned int ) ( stream_.bufferSize * captureSrRatio ),
+ convBufferSize,
+ stream_.deviceFormat[INPUT] );
+
+ if ( stream_.doConvertBuffer[INPUT] ) {
+ // Convert callback buffer to user format
+ convertBuffer( stream_.userBuffer[INPUT],
+ stream_.deviceBuffer,
+ stream_.convertInfo[INPUT] );
+ }
+ else {
+ // no further conversion, simple copy deviceBuffer to userBuffer
+ memcpy( stream_.userBuffer[INPUT],
+ stream_.deviceBuffer,
+ stream_.bufferSize * stream_.nUserChannels[INPUT] * formatBytes( stream_.userFormat ) );
+ }
+ }
+ }
+ else {
+ // if there is no capture stream, set callbackPulled flag
+ callbackPulled = true;
+ }
+
+ // Execute Callback
+ // ================
+ // 1. Execute user callback method
+ // 2. Handle return value from callback
+
+ // if callback has not requested the stream to stop
+ if ( callbackPulled && !callbackStopped ) {
+ // Execute user callback method
+ callbackResult = callback( stream_.userBuffer[OUTPUT],
+ stream_.userBuffer[INPUT],
+ stream_.bufferSize,
+ getStreamTime(),
+ captureFlags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY ? RTAUDIO_INPUT_OVERFLOW : 0,
+ stream_.callbackInfo.userData );
+
+ // Handle return value from callback
+ if ( callbackResult == 1 ) {
+ // instantiate a thread to stop this thread
+ HANDLE threadHandle = CreateThread( NULL, 0, stopWasapiThread, this, 0, NULL );
+ if ( !threadHandle ) {
+ errorType = RtAudioError::THREAD_ERROR;
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream stop thread.";
+ goto Exit;
+ }
+ else if ( !CloseHandle( threadHandle ) ) {
+ errorType = RtAudioError::THREAD_ERROR;
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream stop thread handle.";
+ goto Exit;
+ }
+
+ callbackStopped = true;
+ }
+ else if ( callbackResult == 2 ) {
+ // instantiate a thread to stop this thread
+ HANDLE threadHandle = CreateThread( NULL, 0, abortWasapiThread, this, 0, NULL );
+ if ( !threadHandle ) {
+ errorType = RtAudioError::THREAD_ERROR;
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream abort thread.";
+ goto Exit;
+ }
+ else if ( !CloseHandle( threadHandle ) ) {
+ errorType = RtAudioError::THREAD_ERROR;
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream abort thread handle.";
+ goto Exit;
+ }
+
+ callbackStopped = true;
+ }
+ }
+ }
+
+ // Callback Output
+ // ===============
+ // 1. Convert callback buffer to stream format
+ // 2. Convert callback buffer to stream sample rate and channel count
+ // 3. Push callback buffer into outputBuffer
+
+ if ( renderAudioClient && callbackPulled ) {
+ if ( stream_.doConvertBuffer[OUTPUT] ) {
+ // Convert callback buffer to stream format
+ convertBuffer( stream_.deviceBuffer,
+ stream_.userBuffer[OUTPUT],
+ stream_.convertInfo[OUTPUT] );
+
+ }
+
+ // Convert callback buffer to stream sample rate
+ convertBufferWasapi( convBuffer,
+ stream_.deviceBuffer,
+ stream_.nDeviceChannels[OUTPUT],
+ stream_.sampleRate,
+ renderFormat->nSamplesPerSec,
+ stream_.bufferSize,
+ convBufferSize,
+ stream_.deviceFormat[OUTPUT] );
+
+ // Push callback buffer into outputBuffer
+ callbackPushed = renderBuffer.pushBuffer( convBuffer,
+ convBufferSize * stream_.nDeviceChannels[OUTPUT],
+ stream_.deviceFormat[OUTPUT] );
+ }
+ else {
+ // if there is no render stream, set callbackPushed flag
+ callbackPushed = true;
+ }
+
+ // Stream Capture
+ // ==============
+ // 1. Get capture buffer from stream
+ // 2. Push capture buffer into inputBuffer
+ // 3. If 2. was successful: Release capture buffer
+
+ if ( captureAudioClient ) {
+ // if the callback input buffer was not pulled from captureBuffer, wait for next capture event
+ if ( !callbackPulled ) {
+ WaitForSingleObject( captureEvent, INFINITE );
+ }
+
+ // Get capture buffer from stream
+ hr = captureClient->GetBuffer( &streamBuffer,
+ &bufferFrameCount,
+ &captureFlags, NULL, NULL );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture buffer.";
+ goto Exit;
+ }
+
+ if ( bufferFrameCount != 0 ) {
+ // Push capture buffer into inputBuffer
+ if ( captureBuffer.pushBuffer( ( char* ) streamBuffer,
+ bufferFrameCount * stream_.nDeviceChannels[INPUT],
+ stream_.deviceFormat[INPUT] ) )
+ {
+ // Release capture buffer
+ hr = captureClient->ReleaseBuffer( bufferFrameCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
+ goto Exit;
+ }
+ }
+ else
+ {
+ // Inform WASAPI that capture was unsuccessful
+ hr = captureClient->ReleaseBuffer( 0 );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
+ goto Exit;
+ }
+ }
+ }
+ else
+ {
+ // Inform WASAPI that capture was unsuccessful
+ hr = captureClient->ReleaseBuffer( 0 );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
+ goto Exit;
+ }
+ }
+ }
+
+ // Stream Render
+ // =============
+ // 1. Get render buffer from stream
+ // 2. Pull next buffer from outputBuffer
+ // 3. If 2. was successful: Fill render buffer with next buffer
+ // Release render buffer
+
+ if ( renderAudioClient ) {
+ // if the callback output buffer was not pushed to renderBuffer, wait for next render event
+ if ( callbackPulled && !callbackPushed ) {
+ WaitForSingleObject( renderEvent, INFINITE );
+ }
+
+ // Get render buffer from stream
+ hr = renderAudioClient->GetBufferSize( &bufferFrameCount );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer size.";
+ goto Exit;
+ }
+
+ hr = renderAudioClient->GetCurrentPadding( &numFramesPadding );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer padding.";
+ goto Exit;
+ }
+
+ bufferFrameCount -= numFramesPadding;
+
+ if ( bufferFrameCount != 0 ) {
+ hr = renderClient->GetBuffer( bufferFrameCount, &streamBuffer );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer.";
+ goto Exit;
+ }
+
+ // Pull next buffer from outputBuffer
+ // Fill render buffer with next buffer
+ if ( renderBuffer.pullBuffer( ( char* ) streamBuffer,
+ bufferFrameCount * stream_.nDeviceChannels[OUTPUT],
+ stream_.deviceFormat[OUTPUT] ) )
+ {
+ // Release render buffer
+ hr = renderClient->ReleaseBuffer( bufferFrameCount, 0 );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
+ goto Exit;
+ }
+ }
+ else
+ {
+ // Inform WASAPI that render was unsuccessful
+ hr = renderClient->ReleaseBuffer( 0, 0 );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
+ goto Exit;
+ }
+ }
+ }
+ else
+ {
+ // Inform WASAPI that render was unsuccessful
+ hr = renderClient->ReleaseBuffer( 0, 0 );
+ if ( FAILED( hr ) ) {
+ errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
+ goto Exit;
+ }
+ }
+ }
+
+ // if the callback buffer was pushed renderBuffer reset callbackPulled flag
+ if ( callbackPushed ) {
+ callbackPulled = false;
+ }
+
+ // tick stream time
+ RtApi::tickStreamTime();
+ }
+
+Exit:
+ // clean up
+ CoTaskMemFree( captureFormat );
+ CoTaskMemFree( renderFormat );
+
+ free ( convBuffer );
+
+ CoUninitialize();
+
+ // update stream state
+ stream_.state = STREAM_STOPPED;
+
+ if ( errorText_.empty() )
+ return;
+ else
+ error( errorType );
+}
+
+//******************** End of __WINDOWS_WASAPI__ *********************//
+#endif
+
+
+#if defined(__WINDOWS_DS__) // Windows DirectSound API
+
+// Modified by Robin Davies, October 2005
+// - Improvements to DirectX pointer chasing.
+// - Bug fix for non-power-of-two Asio granularity used by Edirol PCR-A30.
+// - Auto-call CoInitialize for DSOUND and ASIO platforms.
+// Various revisions for RtAudio 4.0 by Gary Scavone, April 2007
+// Changed device query structure for RtAudio 4.0.7, January 2010
+
+#include <dsound.h>
+#include <assert.h>
+#include <algorithm>
+
+#if defined(__MINGW32__)
+ // missing from latest mingw winapi
+#define WAVE_FORMAT_96M08 0x00010000 /* 96 kHz, Mono, 8-bit */
+#define WAVE_FORMAT_96S08 0x00020000 /* 96 kHz, Stereo, 8-bit */
+#define WAVE_FORMAT_96M16 0x00040000 /* 96 kHz, Mono, 16-bit */
+#define WAVE_FORMAT_96S16 0x00080000 /* 96 kHz, Stereo, 16-bit */
+#endif
+
+#define MINIMUM_DEVICE_BUFFER_SIZE 32768
+
+#ifdef _MSC_VER // if Microsoft Visual C++
+#pragma comment( lib, "winmm.lib" ) // then, auto-link winmm.lib. Otherwise, it has to be added manually.
+#endif
+
+static inline DWORD dsPointerBetween( DWORD pointer, DWORD laterPointer, DWORD earlierPointer, DWORD bufferSize )
+{
+ if ( pointer > bufferSize ) pointer -= bufferSize;
+ if ( laterPointer < earlierPointer ) laterPointer += bufferSize;
+ if ( pointer < earlierPointer ) pointer += bufferSize;
+ return pointer >= earlierPointer && pointer < laterPointer;
+}
+
+// A structure to hold various information related to the DirectSound
+// API implementation.
+struct DsHandle {
+ unsigned int drainCounter; // Tracks callback counts when draining
+ bool internalDrain; // Indicates if stop is initiated from callback or not.
+ void *id[2];
+ void *buffer[2];
+ bool xrun[2];
+ UINT bufferPointer[2];
+ DWORD dsBufferSize[2];
+ DWORD dsPointerLeadTime[2]; // the number of bytes ahead of the safe pointer to lead by.
+ HANDLE condition;
+
+ DsHandle()
+ :drainCounter(0), internalDrain(false) { id[0] = 0; id[1] = 0; buffer[0] = 0; buffer[1] = 0; xrun[0] = false; xrun[1] = false; bufferPointer[0] = 0; bufferPointer[1] = 0; }
+};
+
+// Declarations for utility functions, callbacks, and structures
+// specific to the DirectSound implementation.
+static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
+ LPCTSTR description,
+ LPCTSTR module,
+ LPVOID lpContext );
+
+static const char* getErrorString( int code );
+
+static unsigned __stdcall callbackHandler( void *ptr );
+
+struct DsDevice {
+ LPGUID id[2];
+ bool validId[2];
+ bool found;
+ std::string name;
+
+ DsDevice()
+ : found(false) { validId[0] = false; validId[1] = false; }
+};
+
+struct DsProbeData {
+ bool isInput;
+ std::vector<struct DsDevice>* dsDevices;
+};
+
+RtApiDs :: RtApiDs()
+{
+ // Dsound will run both-threaded. If CoInitialize fails, then just
+ // accept whatever the mainline chose for a threading model.
+ coInitialized_ = false;
+ HRESULT hr = CoInitialize( NULL );
+ if ( !FAILED( hr ) ) coInitialized_ = true;
+}
+
+RtApiDs :: ~RtApiDs()
+{
+ if ( coInitialized_ ) CoUninitialize(); // balanced call.
+ if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+// The DirectSound default output is always the first device.
+unsigned int RtApiDs :: getDefaultOutputDevice( void )
+{
+ return 0;
+}
+
+// The DirectSound default input is always the first input device,
+// which is the first capture device enumerated.
+unsigned int RtApiDs :: getDefaultInputDevice( void )
+{
+ return 0;
+}
+
+unsigned int RtApiDs :: getDeviceCount( void )
+{
+ // Set query flag for previously found devices to false, so that we
+ // can check for any devices that have disappeared.
+ for ( unsigned int i=0; i<dsDevices.size(); i++ )
+ dsDevices[i].found = false;
+
+ // Query DirectSound devices.
+ struct DsProbeData probeInfo;
+ probeInfo.isInput = false;
+ probeInfo.dsDevices = &dsDevices;
+ HRESULT result = DirectSoundEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating output devices!";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ }
+
+ // Query DirectSoundCapture devices.
+ probeInfo.isInput = true;
+ result = DirectSoundCaptureEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating input devices!";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ }
+
+ // Clean out any devices that may have disappeared (code update submitted by Eli Zehngut).
+ for ( unsigned int i=0; i<dsDevices.size(); ) {
+ if ( dsDevices[i].found == false ) dsDevices.erase( dsDevices.begin() + i );
+ else i++;
+ }
+
+ return static_cast<unsigned int>(dsDevices.size());
+}
+
+RtAudio::DeviceInfo RtApiDs :: getDeviceInfo( unsigned int device )
+{
+ RtAudio::DeviceInfo info;
+ info.probed = false;
+
+ if ( dsDevices.size() == 0 ) {
+ // Force a query of all devices
+ getDeviceCount();
+ if ( dsDevices.size() == 0 ) {
+ errorText_ = "RtApiDs::getDeviceInfo: no devices found!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+ }
+
+ if ( device >= dsDevices.size() ) {
+ errorText_ = "RtApiDs::getDeviceInfo: device ID is invalid!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ HRESULT result;
+ if ( dsDevices[ device ].validId[0] == false ) goto probeInput;
+
+ LPDIRECTSOUND output;
+ DSCAPS outCaps;
+ result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ goto probeInput;
+ }
+
+ outCaps.dwSize = sizeof( outCaps );
+ result = output->GetCaps( &outCaps );
+ if ( FAILED( result ) ) {
+ output->Release();
+ errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting capabilities!";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ goto probeInput;
+ }
+
+ // Get output channel information.
+ info.outputChannels = ( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ? 2 : 1;
+
+ // Get sample rate information.
+ info.sampleRates.clear();
+ for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+ if ( SAMPLE_RATES[k] >= (unsigned int) outCaps.dwMinSecondarySampleRate &&
+ SAMPLE_RATES[k] <= (unsigned int) outCaps.dwMaxSecondarySampleRate ) {
+ info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+ if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+ info.preferredSampleRate = SAMPLE_RATES[k];
+ }
+ }
+
+ // Get format information.
+ if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) info.nativeFormats |= RTAUDIO_SINT8;
+
+ output->Release();
+
+ if ( getDefaultOutputDevice() == device )
+ info.isDefaultOutput = true;
+
+ if ( dsDevices[ device ].validId[1] == false ) {
+ info.name = dsDevices[ device ].name;
+ info.probed = true;
+ return info;
+ }
+
+ probeInput:
+
+ LPDIRECTSOUNDCAPTURE input;
+ result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ DSCCAPS inCaps;
+ inCaps.dwSize = sizeof( inCaps );
+ result = input->GetCaps( &inCaps );
+ if ( FAILED( result ) ) {
+ input->Release();
+ errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting object capabilities (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Get input channel information.
+ info.inputChannels = inCaps.dwChannels;
+
+ // Get sample rate and format information.
+ std::vector<unsigned int> rates;
+ if ( inCaps.dwChannels >= 2 ) {
+ if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+ if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+ if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+ if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+
+ if ( info.nativeFormats & RTAUDIO_SINT16 ) {
+ if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) rates.push_back( 11025 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) rates.push_back( 22050 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) rates.push_back( 44100 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) rates.push_back( 96000 );
+ }
+ else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
+ if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) rates.push_back( 11025 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) rates.push_back( 22050 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) rates.push_back( 44100 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) rates.push_back( 96000 );
+ }
+ }
+ else if ( inCaps.dwChannels == 1 ) {
+ if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+ if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+ if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+ if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+ if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+
+ if ( info.nativeFormats & RTAUDIO_SINT16 ) {
+ if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) rates.push_back( 11025 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) rates.push_back( 22050 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) rates.push_back( 44100 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) rates.push_back( 96000 );
+ }
+ else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
+ if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) rates.push_back( 11025 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) rates.push_back( 22050 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) rates.push_back( 44100 );
+ if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) rates.push_back( 96000 );
+ }
+ }
+ else info.inputChannels = 0; // technically, this would be an error
+
+ input->Release();
+
+ if ( info.inputChannels == 0 ) return info;
+
+ // Copy the supported rates to the info structure but avoid duplication.
+ bool found;
+ for ( unsigned int i=0; i<rates.size(); i++ ) {
+ found = false;
+ for ( unsigned int j=0; j<info.sampleRates.size(); j++ ) {
+ if ( rates[i] == info.sampleRates[j] ) {
+ found = true;
+ break;
+ }
+ }
+ if ( found == false ) info.sampleRates.push_back( rates[i] );
+ }
+ std::sort( info.sampleRates.begin(), info.sampleRates.end() );
+
+ // If device opens for both playback and capture, we determine the channels.
+ if ( info.outputChannels > 0 && info.inputChannels > 0 )
+ info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+ if ( device == 0 ) info.isDefaultInput = true;
+
+ // Copy name and return.
+ info.name = dsDevices[ device ].name;
+ info.probed = true;
+ return info;
+}
+
+bool RtApiDs :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+ unsigned int firstChannel, unsigned int sampleRate,
+ RtAudioFormat format, unsigned int *bufferSize,
+ RtAudio::StreamOptions *options )
+{
+ if ( channels + firstChannel > 2 ) {
+ errorText_ = "RtApiDs::probeDeviceOpen: DirectSound does not support more than 2 channels per device.";
+ return FAILURE;
+ }
+
+ size_t nDevices = dsDevices.size();
+ if ( nDevices == 0 ) {
+ // This should not happen because a check is made before this function is called.
+ errorText_ = "RtApiDs::probeDeviceOpen: no devices found!";
+ return FAILURE;
+ }
+
+ if ( device >= nDevices ) {
+ // This should not happen because a check is made before this function is called.
+ errorText_ = "RtApiDs::probeDeviceOpen: device ID is invalid!";
+ return FAILURE;
+ }
+
+ if ( mode == OUTPUT ) {
+ if ( dsDevices[ device ].validId[0] == false ) {
+ errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support output!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+ else { // mode == INPUT
+ if ( dsDevices[ device ].validId[1] == false ) {
+ errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support input!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+
+ // According to a note in PortAudio, using GetDesktopWindow()
+ // instead of GetForegroundWindow() is supposed to avoid problems
+ // that occur when the application's window is not the foreground
+ // window. Also, if the application window closes before the
+ // DirectSound buffer, DirectSound can crash. In the past, I had
+ // problems when using GetDesktopWindow() but it seems fine now
+ // (January 2010). I'll leave it commented here.
+ // HWND hWnd = GetForegroundWindow();
+ HWND hWnd = GetDesktopWindow();
+
+ // Check the numberOfBuffers parameter and limit the lowest value to
+ // two. This is a judgement call and a value of two is probably too
+ // low for capture, but it should work for playback.
+ int nBuffers = 0;
+ if ( options ) nBuffers = options->numberOfBuffers;
+ if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) nBuffers = 2;
+ if ( nBuffers < 2 ) nBuffers = 3;
+
+ // Check the lower range of the user-specified buffer size and set
+ // (arbitrarily) to a lower bound of 32.
+ if ( *bufferSize < 32 ) *bufferSize = 32;
+
+ // Create the wave format structure. The data format setting will
+ // be determined later.
+ WAVEFORMATEX waveFormat;
+ ZeroMemory( &waveFormat, sizeof(WAVEFORMATEX) );
+ waveFormat.wFormatTag = WAVE_FORMAT_PCM;
+ waveFormat.nChannels = channels + firstChannel;
+ waveFormat.nSamplesPerSec = (unsigned long) sampleRate;
+
+ // Determine the device buffer size. By default, we'll use the value
+ // defined above (32K), but we will grow it to make allowances for
+ // very large software buffer sizes.
+ DWORD dsBufferSize = MINIMUM_DEVICE_BUFFER_SIZE;
+ DWORD dsPointerLeadTime = 0;
+
+ void *ohandle = 0, *bhandle = 0;
+ HRESULT result;
+ if ( mode == OUTPUT ) {
+
+ LPDIRECTSOUND output;
+ result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ DSCAPS outCaps;
+ outCaps.dwSize = sizeof( outCaps );
+ result = output->GetCaps( &outCaps );
+ if ( FAILED( result ) ) {
+ output->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting capabilities (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Check channel information.
+ if ( channels + firstChannel == 2 && !( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ) {
+ errorStream_ << "RtApiDs::getDeviceInfo: the output device (" << dsDevices[ device ].name << ") does not support stereo playback.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Check format information. Use 16-bit format unless not
+ // supported or user requests 8-bit.
+ if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT &&
+ !( format == RTAUDIO_SINT8 && outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) ) {
+ waveFormat.wBitsPerSample = 16;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ }
+ else {
+ waveFormat.wBitsPerSample = 8;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+ }
+ stream_.userFormat = format;
+
+ // Update wave format structure and buffer information.
+ waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
+ waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
+ dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
+
+ // If the user wants an even bigger buffer, increase the device buffer size accordingly.
+ while ( dsPointerLeadTime * 2U > dsBufferSize )
+ dsBufferSize *= 2;
+
+ // Set cooperative level to DSSCL_EXCLUSIVE ... sound stops when window focus changes.
+ // result = output->SetCooperativeLevel( hWnd, DSSCL_EXCLUSIVE );
+ // Set cooperative level to DSSCL_PRIORITY ... sound remains when window focus changes.
+ result = output->SetCooperativeLevel( hWnd, DSSCL_PRIORITY );
+ if ( FAILED( result ) ) {
+ output->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting cooperative level (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Even though we will write to the secondary buffer, we need to
+ // access the primary buffer to set the correct output format
+ // (since the default is 8-bit, 22 kHz!). Setup the DS primary
+ // buffer description.
+ DSBUFFERDESC bufferDescription;
+ ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
+ bufferDescription.dwSize = sizeof( DSBUFFERDESC );
+ bufferDescription.dwFlags = DSBCAPS_PRIMARYBUFFER;
+
+ // Obtain the primary buffer
+ LPDIRECTSOUNDBUFFER buffer;
+ result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
+ if ( FAILED( result ) ) {
+ output->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") accessing primary buffer (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Set the primary DS buffer sound format.
+ result = buffer->SetFormat( &waveFormat );
+ if ( FAILED( result ) ) {
+ output->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting primary buffer format (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Setup the secondary DS buffer description.
+ ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
+ bufferDescription.dwSize = sizeof( DSBUFFERDESC );
+ bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
+ DSBCAPS_GLOBALFOCUS |
+ DSBCAPS_GETCURRENTPOSITION2 |
+ DSBCAPS_LOCHARDWARE ); // Force hardware mixing
+ bufferDescription.dwBufferBytes = dsBufferSize;
+ bufferDescription.lpwfxFormat = &waveFormat;
+
+ // Try to create the secondary DS buffer. If that doesn't work,
+ // try to use software mixing. Otherwise, there's a problem.
+ result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
+ if ( FAILED( result ) ) {
+ bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
+ DSBCAPS_GLOBALFOCUS |
+ DSBCAPS_GETCURRENTPOSITION2 |
+ DSBCAPS_LOCSOFTWARE ); // Force software mixing
+ result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
+ if ( FAILED( result ) ) {
+ output->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating secondary buffer (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+
+ // Get the buffer size ... might be different from what we specified.
+ DSBCAPS dsbcaps;
+ dsbcaps.dwSize = sizeof( DSBCAPS );
+ result = buffer->GetCaps( &dsbcaps );
+ if ( FAILED( result ) ) {
+ output->Release();
+ buffer->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ dsBufferSize = dsbcaps.dwBufferBytes;
+
+ // Lock the DS buffer
+ LPVOID audioPtr;
+ DWORD dataLen;
+ result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
+ if ( FAILED( result ) ) {
+ output->Release();
+ buffer->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking buffer (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Zero the DS buffer
+ ZeroMemory( audioPtr, dataLen );
+
+ // Unlock the DS buffer
+ result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+ if ( FAILED( result ) ) {
+ output->Release();
+ buffer->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking buffer (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ ohandle = (void *) output;
+ bhandle = (void *) buffer;
+ }
+
+ if ( mode == INPUT ) {
+
+ LPDIRECTSOUNDCAPTURE input;
+ result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ DSCCAPS inCaps;
+ inCaps.dwSize = sizeof( inCaps );
+ result = input->GetCaps( &inCaps );
+ if ( FAILED( result ) ) {
+ input->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting input capabilities (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Check channel information.
+ if ( inCaps.dwChannels < channels + firstChannel ) {
+ errorText_ = "RtApiDs::getDeviceInfo: the input device does not support requested input channels.";
+ return FAILURE;
+ }
+
+ // Check format information. Use 16-bit format unless user
+ // requests 8-bit.
+ DWORD deviceFormats;
+ if ( channels + firstChannel == 2 ) {
+ deviceFormats = WAVE_FORMAT_1S08 | WAVE_FORMAT_2S08 | WAVE_FORMAT_4S08 | WAVE_FORMAT_96S08;
+ if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
+ waveFormat.wBitsPerSample = 8;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+ }
+ else { // assume 16-bit is supported
+ waveFormat.wBitsPerSample = 16;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ }
+ }
+ else { // channel == 1
+ deviceFormats = WAVE_FORMAT_1M08 | WAVE_FORMAT_2M08 | WAVE_FORMAT_4M08 | WAVE_FORMAT_96M08;
+ if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
+ waveFormat.wBitsPerSample = 8;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+ }
+ else { // assume 16-bit is supported
+ waveFormat.wBitsPerSample = 16;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ }
+ }
+ stream_.userFormat = format;
+
+ // Update wave format structure and buffer information.
+ waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
+ waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
+ dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
+
+ // If the user wants an even bigger buffer, increase the device buffer size accordingly.
+ while ( dsPointerLeadTime * 2U > dsBufferSize )
+ dsBufferSize *= 2;
+
+ // Setup the secondary DS buffer description.
+ DSCBUFFERDESC bufferDescription;
+ ZeroMemory( &bufferDescription, sizeof( DSCBUFFERDESC ) );
+ bufferDescription.dwSize = sizeof( DSCBUFFERDESC );
+ bufferDescription.dwFlags = 0;
+ bufferDescription.dwReserved = 0;
+ bufferDescription.dwBufferBytes = dsBufferSize;
+ bufferDescription.lpwfxFormat = &waveFormat;
+
+ // Create the capture buffer.
+ LPDIRECTSOUNDCAPTUREBUFFER buffer;
+ result = input->CreateCaptureBuffer( &bufferDescription, &buffer, NULL );
+ if ( FAILED( result ) ) {
+ input->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating input buffer (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Get the buffer size ... might be different from what we specified.
+ DSCBCAPS dscbcaps;
+ dscbcaps.dwSize = sizeof( DSCBCAPS );
+ result = buffer->GetCaps( &dscbcaps );
+ if ( FAILED( result ) ) {
+ input->Release();
+ buffer->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ dsBufferSize = dscbcaps.dwBufferBytes;
+
+ // NOTE: We could have a problem here if this is a duplex stream
+ // and the play and capture hardware buffer sizes are different
+ // (I'm actually not sure if that is a problem or not).
+ // Currently, we are not verifying that.
+
+ // Lock the capture buffer
+ LPVOID audioPtr;
+ DWORD dataLen;
+ result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
+ if ( FAILED( result ) ) {
+ input->Release();
+ buffer->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking input buffer (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Zero the buffer
+ ZeroMemory( audioPtr, dataLen );
+
+ // Unlock the buffer
+ result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+ if ( FAILED( result ) ) {
+ input->Release();
+ buffer->Release();
+ errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking input buffer (" << dsDevices[ device ].name << ")!";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ ohandle = (void *) input;
+ bhandle = (void *) buffer;
+ }
+
+ // Set various stream parameters
+ DsHandle *handle = 0;
+ stream_.nDeviceChannels[mode] = channels + firstChannel;
+ stream_.nUserChannels[mode] = channels;
+ stream_.bufferSize = *bufferSize;
+ stream_.channelOffset[mode] = firstChannel;
+ stream_.deviceInterleaved[mode] = true;
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+ else stream_.userInterleaved = true;
+
+ // Set flag for buffer conversion
+ stream_.doConvertBuffer[mode] = false;
+ if (stream_.nUserChannels[mode] != stream_.nDeviceChannels[mode])
+ stream_.doConvertBuffer[mode] = true;
+ if (stream_.userFormat != stream_.deviceFormat[mode])
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+ stream_.nUserChannels[mode] > 1 )
+ stream_.doConvertBuffer[mode] = true;
+
+ // Allocate necessary internal buffers
+ long bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+ stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.userBuffer[mode] == NULL ) {
+ errorText_ = "RtApiDs::probeDeviceOpen: error allocating user buffer memory.";
+ goto error;
+ }
+
+ if ( stream_.doConvertBuffer[mode] ) {
+
+ bool makeBuffer = true;
+ bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+ if ( mode == INPUT ) {
+ if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+ unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+ if ( bufferBytes <= (long) bytesOut ) makeBuffer = false;
+ }
+ }
+
+ if ( makeBuffer ) {
+ bufferBytes *= *bufferSize;
+ if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+ stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.deviceBuffer == NULL ) {
+ errorText_ = "RtApiDs::probeDeviceOpen: error allocating device buffer memory.";
+ goto error;
+ }
+ }
+ }
+
+ // Allocate our DsHandle structures for the stream.
+ if ( stream_.apiHandle == 0 ) {
+ try {
+ handle = new DsHandle;
+ }
+ catch ( std::bad_alloc& ) {
+ errorText_ = "RtApiDs::probeDeviceOpen: error allocating AsioHandle memory.";
+ goto error;
+ }
+
+ // Create a manual-reset event.
+ handle->condition = CreateEvent( NULL, // no security
+ TRUE, // manual-reset
+ FALSE, // non-signaled initially
+ NULL ); // unnamed
+ stream_.apiHandle = (void *) handle;
+ }
+ else
+ handle = (DsHandle *) stream_.apiHandle;
+ handle->id[mode] = ohandle;
+ handle->buffer[mode] = bhandle;
+ handle->dsBufferSize[mode] = dsBufferSize;
+ handle->dsPointerLeadTime[mode] = dsPointerLeadTime;
+
+ stream_.device[mode] = device;
+ stream_.state = STREAM_STOPPED;
+ if ( stream_.mode == OUTPUT && mode == INPUT )
+ // We had already set up an output stream.
+ stream_.mode = DUPLEX;
+ else
+ stream_.mode = mode;
+ stream_.nBuffers = nBuffers;
+ stream_.sampleRate = sampleRate;
+
+ // Setup the buffer conversion information structure.
+ if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+ // Setup the callback thread.
+ if ( stream_.callbackInfo.isRunning == false ) {
+ unsigned threadId;
+ stream_.callbackInfo.isRunning = true;
+ stream_.callbackInfo.object = (void *) this;
+ stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &callbackHandler,
+ &stream_.callbackInfo, 0, &threadId );
+ if ( stream_.callbackInfo.thread == 0 ) {
+ errorText_ = "RtApiDs::probeDeviceOpen: error creating callback thread!";
+ goto error;
+ }
+
+ // Boost DS thread priority
+ SetThreadPriority( (HANDLE) stream_.callbackInfo.thread, THREAD_PRIORITY_HIGHEST );
+ }
+ return SUCCESS;
+
+ error:
+ if ( handle ) {
+ if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
+ LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
+ LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+ if ( buffer ) buffer->Release();
+ object->Release();
+ }
+ if ( handle->buffer[1] ) {
+ LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
+ LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+ if ( buffer ) buffer->Release();
+ object->Release();
+ }
+ CloseHandle( handle->condition );
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.state = STREAM_CLOSED;
+ return FAILURE;
+}
+
+void RtApiDs :: closeStream()
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiDs::closeStream(): no open stream to close!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ // Stop the callback thread.
+ stream_.callbackInfo.isRunning = false;
+ WaitForSingleObject( (HANDLE) stream_.callbackInfo.thread, INFINITE );
+ CloseHandle( (HANDLE) stream_.callbackInfo.thread );
+
+ DsHandle *handle = (DsHandle *) stream_.apiHandle;
+ if ( handle ) {
+ if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
+ LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
+ LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+ if ( buffer ) {
+ buffer->Stop();
+ buffer->Release();
+ }
+ object->Release();
+ }
+ if ( handle->buffer[1] ) {
+ LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
+ LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+ if ( buffer ) {
+ buffer->Stop();
+ buffer->Release();
+ }
+ object->Release();
+ }
+ CloseHandle( handle->condition );
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.mode = UNINITIALIZED;
+ stream_.state = STREAM_CLOSED;
+}
+
+void RtApiDs :: startStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiDs::startStream(): the stream is already running!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ DsHandle *handle = (DsHandle *) stream_.apiHandle;
+
+ // Increase scheduler frequency on lesser windows (a side-effect of
+ // increasing timer accuracy). On greater windows (Win2K or later),
+ // this is already in effect.
+ timeBeginPeriod( 1 );
+
+ buffersRolling = false;
+ duplexPrerollBytes = 0;
+
+ if ( stream_.mode == DUPLEX ) {
+ // 0.5 seconds of silence in DUPLEX mode while the devices spin up and synchronize.
+ duplexPrerollBytes = (int) ( 0.5 * stream_.sampleRate * formatBytes( stream_.deviceFormat[1] ) * stream_.nDeviceChannels[1] );
+ }
+
+ HRESULT result = 0;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+ result = buffer->Play( 0, 0, DSBPLAY_LOOPING );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting output buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+ LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+ result = buffer->Start( DSCBSTART_LOOPING );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting input buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ handle->drainCounter = 0;
+ handle->internalDrain = false;
+ ResetEvent( handle->condition );
+ stream_.state = STREAM_RUNNING;
+
+ unlock:
+ if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiDs :: stopStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiDs::stopStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ HRESULT result = 0;
+ LPVOID audioPtr;
+ DWORD dataLen;
+ DsHandle *handle = (DsHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ if ( handle->drainCounter == 0 ) {
+ handle->drainCounter = 2;
+ WaitForSingleObject( handle->condition, INFINITE ); // block until signaled
+ }
+
+ stream_.state = STREAM_STOPPED;
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ // Stop the buffer and clear memory
+ LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+ result = buffer->Stop();
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping output buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+
+ // Lock the buffer and clear it so that if we start to play again,
+ // we won't have old data playing.
+ result = buffer->Lock( 0, handle->dsBufferSize[0], &audioPtr, &dataLen, NULL, NULL, 0 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking output buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+
+ // Zero the DS buffer
+ ZeroMemory( audioPtr, dataLen );
+
+ // Unlock the DS buffer
+ result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking output buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+
+ // If we start playing again, we must begin at beginning of buffer.
+ handle->bufferPointer[0] = 0;
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+ LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+ audioPtr = NULL;
+ dataLen = 0;
+
+ stream_.state = STREAM_STOPPED;
+
+ if ( stream_.mode != DUPLEX )
+ MUTEX_LOCK( &stream_.mutex );
+
+ result = buffer->Stop();
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping input buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+
+ // Lock the buffer and clear it so that if we start to play again,
+ // we won't have old data playing.
+ result = buffer->Lock( 0, handle->dsBufferSize[1], &audioPtr, &dataLen, NULL, NULL, 0 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking input buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+
+ // Zero the DS buffer
+ ZeroMemory( audioPtr, dataLen );
+
+ // Unlock the DS buffer
+ result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking input buffer!";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+
+ // If we start recording again, we must begin at beginning of buffer.
+ handle->bufferPointer[1] = 0;
+ }
+
+ unlock:
+ timeEndPeriod( 1 ); // revert to normal scheduler frequency on lesser windows.
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiDs :: abortStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiDs::abortStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ DsHandle *handle = (DsHandle *) stream_.apiHandle;
+ handle->drainCounter = 2;
+
+ stopStream();
+}
+
+void RtApiDs :: callbackEvent()
+{
+ if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) {
+ Sleep( 50 ); // sleep 50 milliseconds
+ return;
+ }
+
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiDs::callbackEvent(): the stream is closed ... this shouldn't happen!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+ DsHandle *handle = (DsHandle *) stream_.apiHandle;
+
+ // Check if we were draining the stream and signal is finished.
+ if ( handle->drainCounter > stream_.nBuffers + 2 ) {
+
+ stream_.state = STREAM_STOPPING;
+ if ( handle->internalDrain == false )
+ SetEvent( handle->condition );
+ else
+ stopStream();
+ return;
+ }
+
+ // Invoke user callback to get fresh output data UNLESS we are
+ // draining stream.
+ if ( handle->drainCounter == 0 ) {
+ RtAudioCallback callback = (RtAudioCallback) info->callback;
+ double streamTime = getStreamTime();
+ RtAudioStreamStatus status = 0;
+ if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+ status |= RTAUDIO_OUTPUT_UNDERFLOW;
+ handle->xrun[0] = false;
+ }
+ if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+ status |= RTAUDIO_INPUT_OVERFLOW;
+ handle->xrun[1] = false;
+ }
+ int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+ stream_.bufferSize, streamTime, status, info->userData );
+ if ( cbReturnValue == 2 ) {
+ stream_.state = STREAM_STOPPING;
+ handle->drainCounter = 2;
+ abortStream();
+ return;
+ }
+ else if ( cbReturnValue == 1 ) {
+ handle->drainCounter = 1;
+ handle->internalDrain = true;
+ }
+ }
+
+ HRESULT result;
+ DWORD currentWritePointer, safeWritePointer;
+ DWORD currentReadPointer, safeReadPointer;
+ UINT nextWritePointer;
+
+ LPVOID buffer1 = NULL;
+ LPVOID buffer2 = NULL;
+ DWORD bufferSize1 = 0;
+ DWORD bufferSize2 = 0;
+
+ char *buffer;
+ long bufferBytes;
+
+ MUTEX_LOCK( &stream_.mutex );
+ if ( stream_.state == STREAM_STOPPED ) {
+ MUTEX_UNLOCK( &stream_.mutex );
+ return;
+ }
+
+ if ( buffersRolling == false ) {
+ if ( stream_.mode == DUPLEX ) {
+ //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
+
+ // It takes a while for the devices to get rolling. As a result,
+ // there's no guarantee that the capture and write device pointers
+ // will move in lockstep. Wait here for both devices to start
+ // rolling, and then set our buffer pointers accordingly.
+ // e.g. Crystal Drivers: the capture buffer starts up 5700 to 9600
+ // bytes later than the write buffer.
+
+ // Stub: a serious risk of having a pre-emptive scheduling round
+ // take place between the two GetCurrentPosition calls... but I'm
+ // really not sure how to solve the problem. Temporarily boost to
+ // Realtime priority, maybe; but I'm not sure what priority the
+ // DirectSound service threads run at. We *should* be roughly
+ // within a ms or so of correct.
+
+ LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+ LPDIRECTSOUNDCAPTUREBUFFER dsCaptureBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+
+ DWORD startSafeWritePointer, startSafeReadPointer;
+
+ result = dsWriteBuffer->GetCurrentPosition( NULL, &startSafeWritePointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ result = dsCaptureBuffer->GetCurrentPosition( NULL, &startSafeReadPointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ while ( true ) {
+ result = dsWriteBuffer->GetCurrentPosition( NULL, &safeWritePointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ result = dsCaptureBuffer->GetCurrentPosition( NULL, &safeReadPointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ if ( safeWritePointer != startSafeWritePointer && safeReadPointer != startSafeReadPointer ) break;
+ Sleep( 1 );
+ }
+
+ //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
+
+ handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
+ if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
+ handle->bufferPointer[1] = safeReadPointer;
+ }
+ else if ( stream_.mode == OUTPUT ) {
+
+ // Set the proper nextWritePosition after initial startup.
+ LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+ result = dsWriteBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
+ if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
+ }
+
+ buffersRolling = true;
+ }
+
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ LPDIRECTSOUNDBUFFER dsBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+
+ if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+ bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
+ bufferBytes *= formatBytes( stream_.userFormat );
+ memset( stream_.userBuffer[0], 0, bufferBytes );
+ }
+
+ // Setup parameters and do buffer conversion if necessary.
+ if ( stream_.doConvertBuffer[0] ) {
+ buffer = stream_.deviceBuffer;
+ convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+ bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[0];
+ bufferBytes *= formatBytes( stream_.deviceFormat[0] );
+ }
+ else {
+ buffer = stream_.userBuffer[0];
+ bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
+ bufferBytes *= formatBytes( stream_.userFormat );
+ }
+
+ // No byte swapping necessary in DirectSound implementation.
+
+ // Ahhh ... windoze. 16-bit data is signed but 8-bit data is
+ // unsigned. So, we need to convert our signed 8-bit data here to
+ // unsigned.
+ if ( stream_.deviceFormat[0] == RTAUDIO_SINT8 )
+ for ( int i=0; i<bufferBytes; i++ ) buffer[i] = (unsigned char) ( buffer[i] + 128 );
+
+ DWORD dsBufferSize = handle->dsBufferSize[0];
+ nextWritePointer = handle->bufferPointer[0];
+
+ DWORD endWrite, leadPointer;
+ while ( true ) {
+ // Find out where the read and "safe write" pointers are.
+ result = dsBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+
+ // We will copy our output buffer into the region between
+ // safeWritePointer and leadPointer. If leadPointer is not
+ // beyond the next endWrite position, wait until it is.
+ leadPointer = safeWritePointer + handle->dsPointerLeadTime[0];
+ //std::cout << "safeWritePointer = " << safeWritePointer << ", leadPointer = " << leadPointer << ", nextWritePointer = " << nextWritePointer << std::endl;
+ if ( leadPointer > dsBufferSize ) leadPointer -= dsBufferSize;
+ if ( leadPointer < nextWritePointer ) leadPointer += dsBufferSize; // unwrap offset
+ endWrite = nextWritePointer + bufferBytes;
+
+ // Check whether the entire write region is behind the play pointer.
+ if ( leadPointer >= endWrite ) break;
+
+ // If we are here, then we must wait until the leadPointer advances
+ // beyond the end of our next write region. We use the
+ // Sleep() function to suspend operation until that happens.
+ double millis = ( endWrite - leadPointer ) * 1000.0;
+ millis /= ( formatBytes( stream_.deviceFormat[0]) * stream_.nDeviceChannels[0] * stream_.sampleRate);
+ if ( millis < 1.0 ) millis = 1.0;
+ Sleep( (DWORD) millis );
+ }
+
+ if ( dsPointerBetween( nextWritePointer, safeWritePointer, currentWritePointer, dsBufferSize )
+ || dsPointerBetween( endWrite, safeWritePointer, currentWritePointer, dsBufferSize ) ) {
+ // We've strayed into the forbidden zone ... resync the read pointer.
+ handle->xrun[0] = true;
+ nextWritePointer = safeWritePointer + handle->dsPointerLeadTime[0] - bufferBytes;
+ if ( nextWritePointer >= dsBufferSize ) nextWritePointer -= dsBufferSize;
+ handle->bufferPointer[0] = nextWritePointer;
+ endWrite = nextWritePointer + bufferBytes;
+ }
+
+ // Lock free space in the buffer
+ result = dsBuffer->Lock( nextWritePointer, bufferBytes, &buffer1,
+ &bufferSize1, &buffer2, &bufferSize2, 0 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking buffer during playback!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+
+ // Copy our buffer into the DS buffer
+ CopyMemory( buffer1, buffer, bufferSize1 );
+ if ( buffer2 != NULL ) CopyMemory( buffer2, buffer+bufferSize1, bufferSize2 );
+
+ // Update our buffer offset and unlock sound buffer
+ dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking buffer during playback!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ nextWritePointer = ( nextWritePointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
+ handle->bufferPointer[0] = nextWritePointer;
+ }
+
+ // Don't bother draining input
+ if ( handle->drainCounter ) {
+ handle->drainCounter++;
+ goto unlock;
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+ // Setup parameters.
+ if ( stream_.doConvertBuffer[1] ) {
+ buffer = stream_.deviceBuffer;
+ bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[1];
+ bufferBytes *= formatBytes( stream_.deviceFormat[1] );
+ }
+ else {
+ buffer = stream_.userBuffer[1];
+ bufferBytes = stream_.bufferSize * stream_.nUserChannels[1];
+ bufferBytes *= formatBytes( stream_.userFormat );
+ }
+
+ LPDIRECTSOUNDCAPTUREBUFFER dsBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+ long nextReadPointer = handle->bufferPointer[1];
+ DWORD dsBufferSize = handle->dsBufferSize[1];
+
+ // Find out where the write and "safe read" pointers are.
+ result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+
+ if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
+ DWORD endRead = nextReadPointer + bufferBytes;
+
+ // Handling depends on whether we are INPUT or DUPLEX.
+ // If we're in INPUT mode then waiting is a good thing. If we're in DUPLEX mode,
+ // then a wait here will drag the write pointers into the forbidden zone.
+ //
+ // In DUPLEX mode, rather than wait, we will back off the read pointer until
+ // it's in a safe position. This causes dropouts, but it seems to be the only
+ // practical way to sync up the read and write pointers reliably, given the
+ // the very complex relationship between phase and increment of the read and write
+ // pointers.
+ //
+ // In order to minimize audible dropouts in DUPLEX mode, we will
+ // provide a pre-roll period of 0.5 seconds in which we return
+ // zeros from the read buffer while the pointers sync up.
+
+ if ( stream_.mode == DUPLEX ) {
+ if ( safeReadPointer < endRead ) {
+ if ( duplexPrerollBytes <= 0 ) {
+ // Pre-roll time over. Be more agressive.
+ int adjustment = endRead-safeReadPointer;
+
+ handle->xrun[1] = true;
+ // Two cases:
+ // - large adjustments: we've probably run out of CPU cycles, so just resync exactly,
+ // and perform fine adjustments later.
+ // - small adjustments: back off by twice as much.
+ if ( adjustment >= 2*bufferBytes )
+ nextReadPointer = safeReadPointer-2*bufferBytes;
+ else
+ nextReadPointer = safeReadPointer-bufferBytes-adjustment;
+
+ if ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
+
+ }
+ else {
+ // In pre=roll time. Just do it.
+ nextReadPointer = safeReadPointer - bufferBytes;
+ while ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
+ }
+ endRead = nextReadPointer + bufferBytes;
+ }
+ }
+ else { // mode == INPUT
+ while ( safeReadPointer < endRead && stream_.callbackInfo.isRunning ) {
+ // See comments for playback.
+ double millis = (endRead - safeReadPointer) * 1000.0;
+ millis /= ( formatBytes(stream_.deviceFormat[1]) * stream_.nDeviceChannels[1] * stream_.sampleRate);
+ if ( millis < 1.0 ) millis = 1.0;
+ Sleep( (DWORD) millis );
+
+ // Wake up and find out where we are now.
+ result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+
+ if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
+ }
+ }
+
+ // Lock free space in the buffer
+ result = dsBuffer->Lock( nextReadPointer, bufferBytes, &buffer1,
+ &bufferSize1, &buffer2, &bufferSize2, 0 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking capture buffer!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+
+ if ( duplexPrerollBytes <= 0 ) {
+ // Copy our buffer into the DS buffer
+ CopyMemory( buffer, buffer1, bufferSize1 );
+ if ( buffer2 != NULL ) CopyMemory( buffer+bufferSize1, buffer2, bufferSize2 );
+ }
+ else {
+ memset( buffer, 0, bufferSize1 );
+ if ( buffer2 != NULL ) memset( buffer + bufferSize1, 0, bufferSize2 );
+ duplexPrerollBytes -= bufferSize1 + bufferSize2;
+ }
+
+ // Update our buffer offset and unlock sound buffer
+ nextReadPointer = ( nextReadPointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
+ dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
+ if ( FAILED( result ) ) {
+ errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking capture buffer!";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ handle->bufferPointer[1] = nextReadPointer;
+
+ // No byte swapping necessary in DirectSound implementation.
+
+ // If necessary, convert 8-bit data from unsigned to signed.
+ if ( stream_.deviceFormat[1] == RTAUDIO_SINT8 )
+ for ( int j=0; j<bufferBytes; j++ ) buffer[j] = (signed char) ( buffer[j] - 128 );
+
+ // Do buffer conversion if necessary.
+ if ( stream_.doConvertBuffer[1] )
+ convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+ }
+
+ unlock:
+ MUTEX_UNLOCK( &stream_.mutex );
+ RtApi::tickStreamTime();
+}
+
+// Definitions for utility functions and callbacks
+// specific to the DirectSound implementation.
+
+static unsigned __stdcall callbackHandler( void *ptr )
+{
+ CallbackInfo *info = (CallbackInfo *) ptr;
+ RtApiDs *object = (RtApiDs *) info->object;
+ bool* isRunning = &info->isRunning;
+
+ while ( *isRunning == true ) {
+ object->callbackEvent();
+ }
+
+ _endthreadex( 0 );
+ return 0;
+}
+
+static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
+ LPCTSTR description,
+ LPCTSTR /*module*/,
+ LPVOID lpContext )
+{
+ struct DsProbeData& probeInfo = *(struct DsProbeData*) lpContext;
+ std::vector<struct DsDevice>& dsDevices = *probeInfo.dsDevices;
+
+ HRESULT hr;
+ bool validDevice = false;
+ if ( probeInfo.isInput == true ) {
+ DSCCAPS caps;
+ LPDIRECTSOUNDCAPTURE object;
+
+ hr = DirectSoundCaptureCreate( lpguid, &object, NULL );
+ if ( hr != DS_OK ) return TRUE;
+
+ caps.dwSize = sizeof(caps);
+ hr = object->GetCaps( &caps );
+ if ( hr == DS_OK ) {
+ if ( caps.dwChannels > 0 && caps.dwFormats > 0 )
+ validDevice = true;
+ }
+ object->Release();
+ }
+ else {
+ DSCAPS caps;
+ LPDIRECTSOUND object;
+ hr = DirectSoundCreate( lpguid, &object, NULL );
+ if ( hr != DS_OK ) return TRUE;
+
+ caps.dwSize = sizeof(caps);
+ hr = object->GetCaps( &caps );
+ if ( hr == DS_OK ) {
+ if ( caps.dwFlags & DSCAPS_PRIMARYMONO || caps.dwFlags & DSCAPS_PRIMARYSTEREO )
+ validDevice = true;
+ }
+ object->Release();
+ }
+
+ // If good device, then save its name and guid.
+ std::string name = convertCharPointerToStdString( description );
+ //if ( name == "Primary Sound Driver" || name == "Primary Sound Capture Driver" )
+ if ( lpguid == NULL )
+ name = "Default Device";
+ if ( validDevice ) {
+ for ( unsigned int i=0; i<dsDevices.size(); i++ ) {
+ if ( dsDevices[i].name == name ) {
+ dsDevices[i].found = true;
+ if ( probeInfo.isInput ) {
+ dsDevices[i].id[1] = lpguid;
+ dsDevices[i].validId[1] = true;
+ }
+ else {
+ dsDevices[i].id[0] = lpguid;
+ dsDevices[i].validId[0] = true;
+ }
+ return TRUE;
+ }
+ }
+
+ DsDevice device;
+ device.name = name;
+ device.found = true;
+ if ( probeInfo.isInput ) {
+ device.id[1] = lpguid;
+ device.validId[1] = true;
+ }
+ else {
+ device.id[0] = lpguid;
+ device.validId[0] = true;
+ }
+ dsDevices.push_back( device );
+ }
+
+ return TRUE;
+}
+
+static const char* getErrorString( int code )
+{
+ switch ( code ) {
+
+ case DSERR_ALLOCATED:
+ return "Already allocated";
+
+ case DSERR_CONTROLUNAVAIL:
+ return "Control unavailable";
+
+ case DSERR_INVALIDPARAM:
+ return "Invalid parameter";
+
+ case DSERR_INVALIDCALL:
+ return "Invalid call";
+
+ case DSERR_GENERIC:
+ return "Generic error";
+
+ case DSERR_PRIOLEVELNEEDED:
+ return "Priority level needed";
+
+ case DSERR_OUTOFMEMORY:
+ return "Out of memory";
+
+ case DSERR_BADFORMAT:
+ return "The sample rate or the channel format is not supported";
+
+ case DSERR_UNSUPPORTED:
+ return "Not supported";
+
+ case DSERR_NODRIVER:
+ return "No driver";
+
+ case DSERR_ALREADYINITIALIZED:
+ return "Already initialized";
+
+ case DSERR_NOAGGREGATION:
+ return "No aggregation";
+
+ case DSERR_BUFFERLOST:
+ return "Buffer lost";
+
+ case DSERR_OTHERAPPHASPRIO:
+ return "Another application already has priority";
+
+ case DSERR_UNINITIALIZED:
+ return "Uninitialized";
+
+ default:
+ return "DirectSound unknown error";
+ }
+}
+//******************** End of __WINDOWS_DS__ *********************//
+#endif
+
+
+#if defined(__LINUX_ALSA__)
+
+#include <alsa/asoundlib.h>
+#include <unistd.h>
+
+ // A structure to hold various information related to the ALSA API
+ // implementation.
+struct AlsaHandle {
+ snd_pcm_t *handles[2];
+ bool synchronized;
+ bool xrun[2];
+ pthread_cond_t runnable_cv;
+ bool runnable;
+
+ AlsaHandle()
+ :synchronized(false), runnable(false) { xrun[0] = false; xrun[1] = false; }
+};
+
+static void *alsaCallbackHandler( void * ptr );
+
+RtApiAlsa :: RtApiAlsa()
+{
+ // Nothing to do here.
+}
+
+RtApiAlsa :: ~RtApiAlsa()
+{
+ if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiAlsa :: getDeviceCount( void )
+{
+ unsigned nDevices = 0;
+ int result, subdevice, card;
+ char name[64];
+ snd_ctl_t *handle;
+
+ // Count cards and devices
+ card = -1;
+ snd_card_next( &card );
+ while ( card >= 0 ) {
+ sprintf( name, "hw:%d", card );
+ result = snd_ctl_open( &handle, name, 0 );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::getDeviceCount: control open, card = " << card << ", " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ goto nextcard;
+ }
+ subdevice = -1;
+ while( 1 ) {
+ result = snd_ctl_pcm_next_device( handle, &subdevice );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::getDeviceCount: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ break;
+ }
+ if ( subdevice < 0 )
+ break;
+ nDevices++;
+ }
+ nextcard:
+ snd_ctl_close( handle );
+ snd_card_next( &card );
+ }
+
+ result = snd_ctl_open( &handle, "default", 0 );
+ if (result == 0) {
+ nDevices++;
+ snd_ctl_close( handle );
+ }
+
+ return nDevices;
+}
+
+RtAudio::DeviceInfo RtApiAlsa :: getDeviceInfo( unsigned int device )
+{
+ RtAudio::DeviceInfo info;
+ info.probed = false;
+
+ unsigned nDevices = 0;
+ int result, subdevice, card;
+ char name[64];
+ snd_ctl_t *chandle;
+
+ // Count cards and devices
+ card = -1;
+ subdevice = -1;
+ snd_card_next( &card );
+ while ( card >= 0 ) {
+ sprintf( name, "hw:%d", card );
+ result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::getDeviceInfo: control open, card = " << card << ", " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ goto nextcard;
+ }
+ subdevice = -1;
+ while( 1 ) {
+ result = snd_ctl_pcm_next_device( chandle, &subdevice );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::getDeviceInfo: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ break;
+ }
+ if ( subdevice < 0 ) break;
+ if ( nDevices == device ) {
+ sprintf( name, "hw:%d,%d", card, subdevice );
+ goto foundDevice;
+ }
+ nDevices++;
+ }
+ nextcard:
+ snd_ctl_close( chandle );
+ snd_card_next( &card );
+ }
+
+ result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
+ if ( result == 0 ) {
+ if ( nDevices == device ) {
+ strcpy( name, "default" );
+ goto foundDevice;
+ }
+ nDevices++;
+ }
+
+ if ( nDevices == 0 ) {
+ errorText_ = "RtApiAlsa::getDeviceInfo: no devices found!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ if ( device >= nDevices ) {
+ errorText_ = "RtApiAlsa::getDeviceInfo: device ID is invalid!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ foundDevice:
+
+ // If a stream is already open, we cannot probe the stream devices.
+ // Thus, use the saved results.
+ if ( stream_.state != STREAM_CLOSED &&
+ ( stream_.device[0] == device || stream_.device[1] == device ) ) {
+ snd_ctl_close( chandle );
+ if ( device >= devices_.size() ) {
+ errorText_ = "RtApiAlsa::getDeviceInfo: device ID was not present before stream was opened.";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+ return devices_[ device ];
+ }
+
+ int openMode = SND_PCM_ASYNC;
+ snd_pcm_stream_t stream;
+ snd_pcm_info_t *pcminfo;
+ snd_pcm_info_alloca( &pcminfo );
+ snd_pcm_t *phandle;
+ snd_pcm_hw_params_t *params;
+ snd_pcm_hw_params_alloca( &params );
+
+ // First try for playback unless default device (which has subdev -1)
+ stream = SND_PCM_STREAM_PLAYBACK;
+ snd_pcm_info_set_stream( pcminfo, stream );
+ if ( subdevice != -1 ) {
+ snd_pcm_info_set_device( pcminfo, subdevice );
+ snd_pcm_info_set_subdevice( pcminfo, 0 );
+
+ result = snd_ctl_pcm_info( chandle, pcminfo );
+ if ( result < 0 ) {
+ // Device probably doesn't support playback.
+ goto captureProbe;
+ }
+ }
+
+ result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ goto captureProbe;
+ }
+
+ // The device is open ... fill the parameter structure.
+ result = snd_pcm_hw_params_any( phandle, params );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ goto captureProbe;
+ }
+
+ // Get output channel information.
+ unsigned int value;
+ result = snd_pcm_hw_params_get_channels_max( params, &value );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") output channels, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ goto captureProbe;
+ }
+ info.outputChannels = value;
+ snd_pcm_close( phandle );
+
+ captureProbe:
+ stream = SND_PCM_STREAM_CAPTURE;
+ snd_pcm_info_set_stream( pcminfo, stream );
+
+ // Now try for capture unless default device (with subdev = -1)
+ if ( subdevice != -1 ) {
+ result = snd_ctl_pcm_info( chandle, pcminfo );
+ snd_ctl_close( chandle );
+ if ( result < 0 ) {
+ // Device probably doesn't support capture.
+ if ( info.outputChannels == 0 ) return info;
+ goto probeParameters;
+ }
+ }
+ else
+ snd_ctl_close( chandle );
+
+ result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ if ( info.outputChannels == 0 ) return info;
+ goto probeParameters;
+ }
+
+ // The device is open ... fill the parameter structure.
+ result = snd_pcm_hw_params_any( phandle, params );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ if ( info.outputChannels == 0 ) return info;
+ goto probeParameters;
+ }
+
+ result = snd_pcm_hw_params_get_channels_max( params, &value );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") input channels, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ if ( info.outputChannels == 0 ) return info;
+ goto probeParameters;
+ }
+ info.inputChannels = value;
+ snd_pcm_close( phandle );
+
+ // If device opens for both playback and capture, we determine the channels.
+ if ( info.outputChannels > 0 && info.inputChannels > 0 )
+ info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+ // ALSA doesn't provide default devices so we'll use the first available one.
+ if ( device == 0 && info.outputChannels > 0 )
+ info.isDefaultOutput = true;
+ if ( device == 0 && info.inputChannels > 0 )
+ info.isDefaultInput = true;
+
+ probeParameters:
+ // At this point, we just need to figure out the supported data
+ // formats and sample rates. We'll proceed by opening the device in
+ // the direction with the maximum number of channels, or playback if
+ // they are equal. This might limit our sample rate options, but so
+ // be it.
+
+ if ( info.outputChannels >= info.inputChannels )
+ stream = SND_PCM_STREAM_PLAYBACK;
+ else
+ stream = SND_PCM_STREAM_CAPTURE;
+ snd_pcm_info_set_stream( pcminfo, stream );
+
+ result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // The device is open ... fill the parameter structure.
+ result = snd_pcm_hw_params_any( phandle, params );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Test our discrete set of sample rate values.
+ info.sampleRates.clear();
+ for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
+ if ( snd_pcm_hw_params_test_rate( phandle, params, SAMPLE_RATES[i], 0 ) == 0 ) {
+ info.sampleRates.push_back( SAMPLE_RATES[i] );
+
+ if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
+ info.preferredSampleRate = SAMPLE_RATES[i];
+ }
+ }
+ if ( info.sampleRates.size() == 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::getDeviceInfo: no supported sample rates found for device (" << name << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Probe the supported data formats ... we don't care about endian-ness just yet
+ snd_pcm_format_t format;
+ info.nativeFormats = 0;
+ format = SND_PCM_FORMAT_S8;
+ if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+ info.nativeFormats |= RTAUDIO_SINT8;
+ format = SND_PCM_FORMAT_S16;
+ if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+ info.nativeFormats |= RTAUDIO_SINT16;
+ format = SND_PCM_FORMAT_S24;
+ if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+ info.nativeFormats |= RTAUDIO_SINT24;
+ format = SND_PCM_FORMAT_S32;
+ if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+ info.nativeFormats |= RTAUDIO_SINT32;
+ format = SND_PCM_FORMAT_FLOAT;
+ if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+ info.nativeFormats |= RTAUDIO_FLOAT32;
+ format = SND_PCM_FORMAT_FLOAT64;
+ if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+ info.nativeFormats |= RTAUDIO_FLOAT64;
+
+ // Check that we have at least one supported format
+ if ( info.nativeFormats == 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::getDeviceInfo: pcm device (" << name << ") data format not supported by RtAudio.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Get the device name
+ char *cardname;
+ result = snd_card_get_name( card, &cardname );
+ if ( result >= 0 ) {
+ sprintf( name, "hw:%s,%d", cardname, subdevice );
+ free( cardname );
+ }
+ info.name = name;
+
+ // That's all ... close the device and return
+ snd_pcm_close( phandle );
+ info.probed = true;
+ return info;
+}
+
+void RtApiAlsa :: saveDeviceInfo( void )
+{
+ devices_.clear();
+
+ unsigned int nDevices = getDeviceCount();
+ devices_.resize( nDevices );
+ for ( unsigned int i=0; i<nDevices; i++ )
+ devices_[i] = getDeviceInfo( i );
+}
+
+bool RtApiAlsa :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+ unsigned int firstChannel, unsigned int sampleRate,
+ RtAudioFormat format, unsigned int *bufferSize,
+ RtAudio::StreamOptions *options )
+
+{
+#if defined(__RTAUDIO_DEBUG__)
+ snd_output_t *out;
+ snd_output_stdio_attach(&out, stderr, 0);
+#endif
+
+ // I'm not using the "plug" interface ... too much inconsistent behavior.
+
+ unsigned nDevices = 0;
+ int result, subdevice, card;
+ char name[64];
+ snd_ctl_t *chandle;
+
+ if ( options && options->flags & RTAUDIO_ALSA_USE_DEFAULT )
+ snprintf(name, sizeof(name), "%s", "default");
+ else {
+ // Count cards and devices
+ card = -1;
+ snd_card_next( &card );
+ while ( card >= 0 ) {
+ sprintf( name, "hw:%d", card );
+ result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: control open, card = " << card << ", " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ subdevice = -1;
+ while( 1 ) {
+ result = snd_ctl_pcm_next_device( chandle, &subdevice );
+ if ( result < 0 ) break;
+ if ( subdevice < 0 ) break;
+ if ( nDevices == device ) {
+ sprintf( name, "hw:%d,%d", card, subdevice );
+ snd_ctl_close( chandle );
+ goto foundDevice;
+ }
+ nDevices++;
+ }
+ snd_ctl_close( chandle );
+ snd_card_next( &card );
+ }
+
+ result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
+ if ( result == 0 ) {
+ if ( nDevices == device ) {
+ strcpy( name, "default" );
+ goto foundDevice;
+ }
+ nDevices++;
+ }
+
+ if ( nDevices == 0 ) {
+ // This should not happen because a check is made before this function is called.
+ errorText_ = "RtApiAlsa::probeDeviceOpen: no devices found!";
+ return FAILURE;
+ }
+
+ if ( device >= nDevices ) {
+ // This should not happen because a check is made before this function is called.
+ errorText_ = "RtApiAlsa::probeDeviceOpen: device ID is invalid!";
+ return FAILURE;
+ }
+ }
+
+ foundDevice:
+
+ // The getDeviceInfo() function will not work for a device that is
+ // already open. Thus, we'll probe the system before opening a
+ // stream and save the results for use by getDeviceInfo().
+ if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) // only do once
+ this->saveDeviceInfo();
+
+ snd_pcm_stream_t stream;
+ if ( mode == OUTPUT )
+ stream = SND_PCM_STREAM_PLAYBACK;
+ else
+ stream = SND_PCM_STREAM_CAPTURE;
+
+ snd_pcm_t *phandle;
+ int openMode = SND_PCM_ASYNC;
+ result = snd_pcm_open( &phandle, name, stream, openMode );
+ if ( result < 0 ) {
+ if ( mode == OUTPUT )
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for output.";
+ else
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for input.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Fill the parameter structure.
+ snd_pcm_hw_params_t *hw_params;
+ snd_pcm_hw_params_alloca( &hw_params );
+ result = snd_pcm_hw_params_any( phandle, hw_params );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") parameters, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+#if defined(__RTAUDIO_DEBUG__)
+ fprintf( stderr, "\nRtApiAlsa: dump hardware params just after device open:\n\n" );
+ snd_pcm_hw_params_dump( hw_params, out );
+#endif
+
+ // Set access ... check user preference.
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) {
+ stream_.userInterleaved = false;
+ result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
+ if ( result < 0 ) {
+ result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
+ stream_.deviceInterleaved[mode] = true;
+ }
+ else
+ stream_.deviceInterleaved[mode] = false;
+ }
+ else {
+ stream_.userInterleaved = true;
+ result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
+ if ( result < 0 ) {
+ result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
+ stream_.deviceInterleaved[mode] = false;
+ }
+ else
+ stream_.deviceInterleaved[mode] = true;
+ }
+
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") access, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Determine how to set the device format.
+ stream_.userFormat = format;
+ snd_pcm_format_t deviceFormat = SND_PCM_FORMAT_UNKNOWN;
+
+ if ( format == RTAUDIO_SINT8 )
+ deviceFormat = SND_PCM_FORMAT_S8;
+ else if ( format == RTAUDIO_SINT16 )
+ deviceFormat = SND_PCM_FORMAT_S16;
+ else if ( format == RTAUDIO_SINT24 )
+ deviceFormat = SND_PCM_FORMAT_S24;
+ else if ( format == RTAUDIO_SINT32 )
+ deviceFormat = SND_PCM_FORMAT_S32;
+ else if ( format == RTAUDIO_FLOAT32 )
+ deviceFormat = SND_PCM_FORMAT_FLOAT;
+ else if ( format == RTAUDIO_FLOAT64 )
+ deviceFormat = SND_PCM_FORMAT_FLOAT64;
+
+ if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat) == 0) {
+ stream_.deviceFormat[mode] = format;
+ goto setFormat;
+ }
+
+ // The user requested format is not natively supported by the device.
+ deviceFormat = SND_PCM_FORMAT_FLOAT64;
+ if ( snd_pcm_hw_params_test_format( phandle, hw_params, deviceFormat ) == 0 ) {
+ stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
+ goto setFormat;
+ }
+
+ deviceFormat = SND_PCM_FORMAT_FLOAT;
+ if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+ stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+ goto setFormat;
+ }
+
+ deviceFormat = SND_PCM_FORMAT_S32;
+ if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+ stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+ goto setFormat;
+ }
+
+ deviceFormat = SND_PCM_FORMAT_S24;
+ if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+ stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+ goto setFormat;
+ }
+
+ deviceFormat = SND_PCM_FORMAT_S16;
+ if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ goto setFormat;
+ }
+
+ deviceFormat = SND_PCM_FORMAT_S8;
+ if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+ stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+ goto setFormat;
+ }
+
+ // If we get here, no supported format was found.
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device " << device << " data format not supported by RtAudio.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+
+ setFormat:
+ result = snd_pcm_hw_params_set_format( phandle, hw_params, deviceFormat );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") data format, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Determine whether byte-swaping is necessary.
+ stream_.doByteSwap[mode] = false;
+ if ( deviceFormat != SND_PCM_FORMAT_S8 ) {
+ result = snd_pcm_format_cpu_endian( deviceFormat );
+ if ( result == 0 )
+ stream_.doByteSwap[mode] = true;
+ else if (result < 0) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") endian-ness, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+
+ // Set the sample rate.
+ result = snd_pcm_hw_params_set_rate_near( phandle, hw_params, (unsigned int*) &sampleRate, 0 );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting sample rate on device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Determine the number of channels for this device. We support a possible
+ // minimum device channel number > than the value requested by the user.
+ stream_.nUserChannels[mode] = channels;
+ unsigned int value;
+ result = snd_pcm_hw_params_get_channels_max( hw_params, &value );
+ unsigned int deviceChannels = value;
+ if ( result < 0 || deviceChannels < channels + firstChannel ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: requested channel parameters not supported by device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ result = snd_pcm_hw_params_get_channels_min( hw_params, &value );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting minimum channels for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ deviceChannels = value;
+ if ( deviceChannels < channels + firstChannel ) deviceChannels = channels + firstChannel;
+ stream_.nDeviceChannels[mode] = deviceChannels;
+
+ // Set the device channels.
+ result = snd_pcm_hw_params_set_channels( phandle, hw_params, deviceChannels );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting channels for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Set the buffer (or period) size.
+ int dir = 0;
+ snd_pcm_uframes_t periodSize = *bufferSize;
+ result = snd_pcm_hw_params_set_period_size_near( phandle, hw_params, &periodSize, &dir );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting period size for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ *bufferSize = periodSize;
+
+ // Set the buffer number, which in ALSA is referred to as the "period".
+ unsigned int periods = 0;
+ if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) periods = 2;
+ if ( options && options->numberOfBuffers > 0 ) periods = options->numberOfBuffers;
+ if ( periods < 2 ) periods = 4; // a fairly safe default value
+ result = snd_pcm_hw_params_set_periods_near( phandle, hw_params, &periods, &dir );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting periods for device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // If attempting to setup a duplex stream, the bufferSize parameter
+ // MUST be the same in both directions!
+ if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ stream_.bufferSize = *bufferSize;
+
+ // Install the hardware configuration
+ result = snd_pcm_hw_params( phandle, hw_params );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing hardware configuration on device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+#if defined(__RTAUDIO_DEBUG__)
+ fprintf(stderr, "\nRtApiAlsa: dump hardware params after installation:\n\n");
+ snd_pcm_hw_params_dump( hw_params, out );
+#endif
+
+ // Set the software configuration to fill buffers with zeros and prevent device stopping on xruns.
+ snd_pcm_sw_params_t *sw_params = NULL;
+ snd_pcm_sw_params_alloca( &sw_params );
+ snd_pcm_sw_params_current( phandle, sw_params );
+ snd_pcm_sw_params_set_start_threshold( phandle, sw_params, *bufferSize );
+ snd_pcm_sw_params_set_stop_threshold( phandle, sw_params, ULONG_MAX );
+ snd_pcm_sw_params_set_silence_threshold( phandle, sw_params, 0 );
+
+ // The following two settings were suggested by Theo Veenker
+ //snd_pcm_sw_params_set_avail_min( phandle, sw_params, *bufferSize );
+ //snd_pcm_sw_params_set_xfer_align( phandle, sw_params, 1 );
+
+ // here are two options for a fix
+ //snd_pcm_sw_params_set_silence_size( phandle, sw_params, ULONG_MAX );
+ snd_pcm_uframes_t val;
+ snd_pcm_sw_params_get_boundary( sw_params, &val );
+ snd_pcm_sw_params_set_silence_size( phandle, sw_params, val );
+
+ result = snd_pcm_sw_params( phandle, sw_params );
+ if ( result < 0 ) {
+ snd_pcm_close( phandle );
+ errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing software configuration on device (" << name << "), " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+#if defined(__RTAUDIO_DEBUG__)
+ fprintf(stderr, "\nRtApiAlsa: dump software params after installation:\n\n");
+ snd_pcm_sw_params_dump( sw_params, out );
+#endif
+
+ // Set flags for buffer conversion
+ stream_.doConvertBuffer[mode] = false;
+ if ( stream_.userFormat != stream_.deviceFormat[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+ stream_.nUserChannels[mode] > 1 )
+ stream_.doConvertBuffer[mode] = true;
+
+ // Allocate the ApiHandle if necessary and then save.
+ AlsaHandle *apiInfo = 0;
+ if ( stream_.apiHandle == 0 ) {
+ try {
+ apiInfo = (AlsaHandle *) new AlsaHandle;
+ }
+ catch ( std::bad_alloc& ) {
+ errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating AlsaHandle memory.";
+ goto error;
+ }
+
+ if ( pthread_cond_init( &apiInfo->runnable_cv, NULL ) ) {
+ errorText_ = "RtApiAlsa::probeDeviceOpen: error initializing pthread condition variable.";
+ goto error;
+ }
+
+ stream_.apiHandle = (void *) apiInfo;
+ apiInfo->handles[0] = 0;
+ apiInfo->handles[1] = 0;
+ }
+ else {
+ apiInfo = (AlsaHandle *) stream_.apiHandle;
+ }
+ apiInfo->handles[mode] = phandle;
+ phandle = 0;
+
+ // Allocate necessary internal buffers.
+ unsigned long bufferBytes;
+ bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+ stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.userBuffer[mode] == NULL ) {
+ errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating user buffer memory.";
+ goto error;
+ }
+
+ if ( stream_.doConvertBuffer[mode] ) {
+
+ bool makeBuffer = true;
+ bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+ if ( mode == INPUT ) {
+ if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+ unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+ if ( bufferBytes <= bytesOut ) makeBuffer = false;
+ }
+ }
+
+ if ( makeBuffer ) {
+ bufferBytes *= *bufferSize;
+ if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+ stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.deviceBuffer == NULL ) {
+ errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating device buffer memory.";
+ goto error;
+ }
+ }
+ }
+
+ stream_.sampleRate = sampleRate;
+ stream_.nBuffers = periods;
+ stream_.device[mode] = device;
+ stream_.state = STREAM_STOPPED;
+
+ // Setup the buffer conversion information structure.
+ if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+ // Setup thread if necessary.
+ if ( stream_.mode == OUTPUT && mode == INPUT ) {
+ // We had already set up an output stream.
+ stream_.mode = DUPLEX;
+ // Link the streams if possible.
+ apiInfo->synchronized = false;
+ if ( snd_pcm_link( apiInfo->handles[0], apiInfo->handles[1] ) == 0 )
+ apiInfo->synchronized = true;
+ else {
+ errorText_ = "RtApiAlsa::probeDeviceOpen: unable to synchronize input and output devices.";
+ error( RtAudioError::WARNING );
+ }
+ }
+ else {
+ stream_.mode = mode;
+
+ // Setup callback thread.
+ stream_.callbackInfo.object = (void *) this;
+
+ // Set the thread attributes for joinable and realtime scheduling
+ // priority (optional). The higher priority will only take affect
+ // if the program is run as root or suid. Note, under Linux
+ // processes with CAP_SYS_NICE privilege, a user can change
+ // scheduling policy and priority (thus need not be root). See
+ // POSIX "capabilities".
+ pthread_attr_t attr;
+ pthread_attr_init( &attr );
+ pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
+
+#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
+ if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
+ // We previously attempted to increase the audio callback priority
+ // to SCHED_RR here via the attributes. However, while no errors
+ // were reported in doing so, it did not work. So, now this is
+ // done in the alsaCallbackHandler function.
+ stream_.callbackInfo.doRealtime = true;
+ int priority = options->priority;
+ int min = sched_get_priority_min( SCHED_RR );
+ int max = sched_get_priority_max( SCHED_RR );
+ if ( priority < min ) priority = min;
+ else if ( priority > max ) priority = max;
+ stream_.callbackInfo.priority = priority;
+ }
+#endif
+
+ stream_.callbackInfo.isRunning = true;
+ result = pthread_create( &stream_.callbackInfo.thread, &attr, alsaCallbackHandler, &stream_.callbackInfo );
+ pthread_attr_destroy( &attr );
+ if ( result ) {
+ stream_.callbackInfo.isRunning = false;
+ errorText_ = "RtApiAlsa::error creating callback thread!";
+ goto error;
+ }
+ }
+
+ return SUCCESS;
+
+ error:
+ if ( apiInfo ) {
+ pthread_cond_destroy( &apiInfo->runnable_cv );
+ if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
+ if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
+ delete apiInfo;
+ stream_.apiHandle = 0;
+ }
+
+ if ( phandle) snd_pcm_close( phandle );
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.state = STREAM_CLOSED;
+ return FAILURE;
+}
+
+void RtApiAlsa :: closeStream()
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiAlsa::closeStream(): no open stream to close!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+ stream_.callbackInfo.isRunning = false;
+ MUTEX_LOCK( &stream_.mutex );
+ if ( stream_.state == STREAM_STOPPED ) {
+ apiInfo->runnable = true;
+ pthread_cond_signal( &apiInfo->runnable_cv );
+ }
+ MUTEX_UNLOCK( &stream_.mutex );
+ pthread_join( stream_.callbackInfo.thread, NULL );
+
+ if ( stream_.state == STREAM_RUNNING ) {
+ stream_.state = STREAM_STOPPED;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
+ snd_pcm_drop( apiInfo->handles[0] );
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
+ snd_pcm_drop( apiInfo->handles[1] );
+ }
+
+ if ( apiInfo ) {
+ pthread_cond_destroy( &apiInfo->runnable_cv );
+ if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
+ if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
+ delete apiInfo;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.mode = UNINITIALIZED;
+ stream_.state = STREAM_CLOSED;
+}
+
+void RtApiAlsa :: startStream()
+{
+ // This method calls snd_pcm_prepare if the device isn't already in that state.
+
+ verifyStream();
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiAlsa::startStream(): the stream is already running!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ int result = 0;
+ snd_pcm_state_t state;
+ AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+ snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ state = snd_pcm_state( handle[0] );
+ if ( state != SND_PCM_STATE_PREPARED ) {
+ result = snd_pcm_prepare( handle[0] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::startStream: error preparing output pcm device, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+ }
+
+ if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
+ result = snd_pcm_drop(handle[1]); // fix to remove stale data received since device has been open
+ state = snd_pcm_state( handle[1] );
+ if ( state != SND_PCM_STATE_PREPARED ) {
+ result = snd_pcm_prepare( handle[1] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::startStream: error preparing input pcm device, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+ }
+
+ stream_.state = STREAM_RUNNING;
+
+ unlock:
+ apiInfo->runnable = true;
+ pthread_cond_signal( &apiInfo->runnable_cv );
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ if ( result >= 0 ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAlsa :: stopStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiAlsa::stopStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ stream_.state = STREAM_STOPPED;
+ MUTEX_LOCK( &stream_.mutex );
+
+ int result = 0;
+ AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+ snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ if ( apiInfo->synchronized )
+ result = snd_pcm_drop( handle[0] );
+ else
+ result = snd_pcm_drain( handle[0] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::stopStream: error draining output pcm device, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
+ result = snd_pcm_drop( handle[1] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::stopStream: error stopping input pcm device, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ unlock:
+ apiInfo->runnable = false; // fixes high CPU usage when stopped
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ if ( result >= 0 ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAlsa :: abortStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiAlsa::abortStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ stream_.state = STREAM_STOPPED;
+ MUTEX_LOCK( &stream_.mutex );
+
+ int result = 0;
+ AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+ snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ result = snd_pcm_drop( handle[0] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::abortStream: error aborting output pcm device, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
+ result = snd_pcm_drop( handle[1] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::abortStream: error aborting input pcm device, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ unlock:
+ apiInfo->runnable = false; // fixes high CPU usage when stopped
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ if ( result >= 0 ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAlsa :: callbackEvent()
+{
+ AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+ if ( stream_.state == STREAM_STOPPED ) {
+ MUTEX_LOCK( &stream_.mutex );
+ while ( !apiInfo->runnable )
+ pthread_cond_wait( &apiInfo->runnable_cv, &stream_.mutex );
+
+ if ( stream_.state != STREAM_RUNNING ) {
+ MUTEX_UNLOCK( &stream_.mutex );
+ return;
+ }
+ MUTEX_UNLOCK( &stream_.mutex );
+ }
+
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiAlsa::callbackEvent(): the stream is closed ... this shouldn't happen!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ int doStopStream = 0;
+ RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
+ double streamTime = getStreamTime();
+ RtAudioStreamStatus status = 0;
+ if ( stream_.mode != INPUT && apiInfo->xrun[0] == true ) {
+ status |= RTAUDIO_OUTPUT_UNDERFLOW;
+ apiInfo->xrun[0] = false;
+ }
+ if ( stream_.mode != OUTPUT && apiInfo->xrun[1] == true ) {
+ status |= RTAUDIO_INPUT_OVERFLOW;
+ apiInfo->xrun[1] = false;
+ }
+ doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+ stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
+
+ if ( doStopStream == 2 ) {
+ abortStream();
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ // The state might change while waiting on a mutex.
+ if ( stream_.state == STREAM_STOPPED ) goto unlock;
+
+ int result;
+ char *buffer;
+ int channels;
+ snd_pcm_t **handle;
+ snd_pcm_sframes_t frames;
+ RtAudioFormat format;
+ handle = (snd_pcm_t **) apiInfo->handles;
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+ // Setup parameters.
+ if ( stream_.doConvertBuffer[1] ) {
+ buffer = stream_.deviceBuffer;
+ channels = stream_.nDeviceChannels[1];
+ format = stream_.deviceFormat[1];
+ }
+ else {
+ buffer = stream_.userBuffer[1];
+ channels = stream_.nUserChannels[1];
+ format = stream_.userFormat;
+ }
+
+ // Read samples from device in interleaved/non-interleaved format.
+ if ( stream_.deviceInterleaved[1] )
+ result = snd_pcm_readi( handle[1], buffer, stream_.bufferSize );
+ else {
+ void *bufs[channels];
+ size_t offset = stream_.bufferSize * formatBytes( format );
+ for ( int i=0; i<channels; i++ )
+ bufs[i] = (void *) (buffer + (i * offset));
+ result = snd_pcm_readn( handle[1], bufs, stream_.bufferSize );
+ }
+
+ if ( result < (int) stream_.bufferSize ) {
+ // Either an error or overrun occured.
+ if ( result == -EPIPE ) {
+ snd_pcm_state_t state = snd_pcm_state( handle[1] );
+ if ( state == SND_PCM_STATE_XRUN ) {
+ apiInfo->xrun[1] = true;
+ result = snd_pcm_prepare( handle[1] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after overrun, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ }
+ }
+ else {
+ errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ }
+ }
+ else {
+ errorStream_ << "RtApiAlsa::callbackEvent: audio read error, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ }
+ error( RtAudioError::WARNING );
+ goto tryOutput;
+ }
+
+ // Do byte swapping if necessary.
+ if ( stream_.doByteSwap[1] )
+ byteSwapBuffer( buffer, stream_.bufferSize * channels, format );
+
+ // Do buffer conversion if necessary.
+ if ( stream_.doConvertBuffer[1] )
+ convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+
+ // Check stream latency
+ result = snd_pcm_delay( handle[1], &frames );
+ if ( result == 0 && frames > 0 ) stream_.latency[1] = frames;
+ }
+
+ tryOutput:
+
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ // Setup parameters and do buffer conversion if necessary.
+ if ( stream_.doConvertBuffer[0] ) {
+ buffer = stream_.deviceBuffer;
+ convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+ channels = stream_.nDeviceChannels[0];
+ format = stream_.deviceFormat[0];
+ }
+ else {
+ buffer = stream_.userBuffer[0];
+ channels = stream_.nUserChannels[0];
+ format = stream_.userFormat;
+ }
+
+ // Do byte swapping if necessary.
+ if ( stream_.doByteSwap[0] )
+ byteSwapBuffer(buffer, stream_.bufferSize * channels, format);
+
+ // Write samples to device in interleaved/non-interleaved format.
+ if ( stream_.deviceInterleaved[0] )
+ result = snd_pcm_writei( handle[0], buffer, stream_.bufferSize );
+ else {
+ void *bufs[channels];
+ size_t offset = stream_.bufferSize * formatBytes( format );
+ for ( int i=0; i<channels; i++ )
+ bufs[i] = (void *) (buffer + (i * offset));
+ result = snd_pcm_writen( handle[0], bufs, stream_.bufferSize );
+ }
+
+ if ( result < (int) stream_.bufferSize ) {
+ // Either an error or underrun occured.
+ if ( result == -EPIPE ) {
+ snd_pcm_state_t state = snd_pcm_state( handle[0] );
+ if ( state == SND_PCM_STATE_XRUN ) {
+ apiInfo->xrun[0] = true;
+ result = snd_pcm_prepare( handle[0] );
+ if ( result < 0 ) {
+ errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after underrun, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ }
+ else
+ errorText_ = "RtApiAlsa::callbackEvent: audio write error, underrun.";
+ }
+ else {
+ errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ }
+ }
+ else {
+ errorStream_ << "RtApiAlsa::callbackEvent: audio write error, " << snd_strerror( result ) << ".";
+ errorText_ = errorStream_.str();
+ }
+ error( RtAudioError::WARNING );
+ goto unlock;
+ }
+
+ // Check stream latency
+ result = snd_pcm_delay( handle[0], &frames );
+ if ( result == 0 && frames > 0 ) stream_.latency[0] = frames;
+ }
+
+ unlock:
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ RtApi::tickStreamTime();
+ if ( doStopStream == 1 ) this->stopStream();
+}
+
+static void *alsaCallbackHandler( void *ptr )
+{
+ CallbackInfo *info = (CallbackInfo *) ptr;
+ RtApiAlsa *object = (RtApiAlsa *) info->object;
+ bool *isRunning = &info->isRunning;
+
+#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
+ if ( &info->doRealtime ) {
+ pthread_t tID = pthread_self(); // ID of this thread
+ sched_param prio = { info->priority }; // scheduling priority of thread
+ pthread_setschedparam( tID, SCHED_RR, &prio );
+ }
+#endif
+
+ while ( *isRunning == true ) {
+ pthread_testcancel();
+ object->callbackEvent();
+ }
+
+ pthread_exit( NULL );
+}
+
+//******************** End of __LINUX_ALSA__ *********************//
+#endif
+
+#if defined(__LINUX_PULSE__)
+
+// Code written by Peter Meerwald, pmeerw@pmeerw.net
+// and Tristan Matthews.
+
+#include <pulse/error.h>
+#include <pulse/simple.h>
+#include <cstdio>
+
+static const unsigned int SUPPORTED_SAMPLERATES[] = { 8000, 16000, 22050, 32000,
+ 44100, 48000, 96000, 0};
+
+struct rtaudio_pa_format_mapping_t {
+ RtAudioFormat rtaudio_format;
+ pa_sample_format_t pa_format;
+};
+
+static const rtaudio_pa_format_mapping_t supported_sampleformats[] = {
+ {RTAUDIO_SINT16, PA_SAMPLE_S16LE},
+ {RTAUDIO_SINT32, PA_SAMPLE_S32LE},
+ {RTAUDIO_FLOAT32, PA_SAMPLE_FLOAT32LE},
+ {0, PA_SAMPLE_INVALID}};
+
+struct PulseAudioHandle {
+ pa_simple *s_play;
+ pa_simple *s_rec;
+ pthread_t thread;
+ pthread_cond_t runnable_cv;
+ bool runnable;
+ PulseAudioHandle() : s_play(0), s_rec(0), runnable(false) { }
+};
+
+RtApiPulse::~RtApiPulse()
+{
+ if ( stream_.state != STREAM_CLOSED )
+ closeStream();
+}
+
+unsigned int RtApiPulse::getDeviceCount( void )
+{
+ return 1;
+}
+
+RtAudio::DeviceInfo RtApiPulse::getDeviceInfo( unsigned int /*device*/ )
+{
+ RtAudio::DeviceInfo info;
+ info.probed = true;
+ info.name = "PulseAudio";
+ info.outputChannels = 2;
+ info.inputChannels = 2;
+ info.duplexChannels = 2;
+ info.isDefaultOutput = true;
+ info.isDefaultInput = true;
+
+ for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr )
+ info.sampleRates.push_back( *sr );
+
+ info.preferredSampleRate = 48000;
+ info.nativeFormats = RTAUDIO_SINT16 | RTAUDIO_SINT32 | RTAUDIO_FLOAT32;
+
+ return info;
+}
+
+static void *pulseaudio_callback( void * user )
+{
+ CallbackInfo *cbi = static_cast<CallbackInfo *>( user );
+ RtApiPulse *context = static_cast<RtApiPulse *>( cbi->object );
+ volatile bool *isRunning = &cbi->isRunning;
+
+ while ( *isRunning ) {
+ pthread_testcancel();
+ context->callbackEvent();
+ }
+
+ pthread_exit( NULL );
+}
+
+void RtApiPulse::closeStream( void )
+{
+ PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+ stream_.callbackInfo.isRunning = false;
+ if ( pah ) {
+ MUTEX_LOCK( &stream_.mutex );
+ if ( stream_.state == STREAM_STOPPED ) {
+ pah->runnable = true;
+ pthread_cond_signal( &pah->runnable_cv );
+ }
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ pthread_join( pah->thread, 0 );
+ if ( pah->s_play ) {
+ pa_simple_flush( pah->s_play, NULL );
+ pa_simple_free( pah->s_play );
+ }
+ if ( pah->s_rec )
+ pa_simple_free( pah->s_rec );
+
+ pthread_cond_destroy( &pah->runnable_cv );
+ delete pah;
+ stream_.apiHandle = 0;
+ }
+
+ if ( stream_.userBuffer[0] ) {
+ free( stream_.userBuffer[0] );
+ stream_.userBuffer[0] = 0;
+ }
+ if ( stream_.userBuffer[1] ) {
+ free( stream_.userBuffer[1] );
+ stream_.userBuffer[1] = 0;
+ }
+
+ stream_.state = STREAM_CLOSED;
+ stream_.mode = UNINITIALIZED;
+}
+
+void RtApiPulse::callbackEvent( void )
+{
+ PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+ if ( stream_.state == STREAM_STOPPED ) {
+ MUTEX_LOCK( &stream_.mutex );
+ while ( !pah->runnable )
+ pthread_cond_wait( &pah->runnable_cv, &stream_.mutex );
+
+ if ( stream_.state != STREAM_RUNNING ) {
+ MUTEX_UNLOCK( &stream_.mutex );
+ return;
+ }
+ MUTEX_UNLOCK( &stream_.mutex );
+ }
+
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiPulse::callbackEvent(): the stream is closed ... "
+ "this shouldn't happen!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
+ double streamTime = getStreamTime();
+ RtAudioStreamStatus status = 0;
+ int doStopStream = callback( stream_.userBuffer[OUTPUT], stream_.userBuffer[INPUT],
+ stream_.bufferSize, streamTime, status,
+ stream_.callbackInfo.userData );
+
+ if ( doStopStream == 2 ) {
+ abortStream();
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+ void *pulse_in = stream_.doConvertBuffer[INPUT] ? stream_.deviceBuffer : stream_.userBuffer[INPUT];
+ void *pulse_out = stream_.doConvertBuffer[OUTPUT] ? stream_.deviceBuffer : stream_.userBuffer[OUTPUT];
+
+ if ( stream_.state != STREAM_RUNNING )
+ goto unlock;
+
+ int pa_error;
+ size_t bytes;
+ if (stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ if ( stream_.doConvertBuffer[OUTPUT] ) {
+ convertBuffer( stream_.deviceBuffer,
+ stream_.userBuffer[OUTPUT],
+ stream_.convertInfo[OUTPUT] );
+ bytes = stream_.nDeviceChannels[OUTPUT] * stream_.bufferSize *
+ formatBytes( stream_.deviceFormat[OUTPUT] );
+ } else
+ bytes = stream_.nUserChannels[OUTPUT] * stream_.bufferSize *
+ formatBytes( stream_.userFormat );
+
+ if ( pa_simple_write( pah->s_play, pulse_out, bytes, &pa_error ) < 0 ) {
+ errorStream_ << "RtApiPulse::callbackEvent: audio write error, " <<
+ pa_strerror( pa_error ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ }
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX) {
+ if ( stream_.doConvertBuffer[INPUT] )
+ bytes = stream_.nDeviceChannels[INPUT] * stream_.bufferSize *
+ formatBytes( stream_.deviceFormat[INPUT] );
+ else
+ bytes = stream_.nUserChannels[INPUT] * stream_.bufferSize *
+ formatBytes( stream_.userFormat );
+
+ if ( pa_simple_read( pah->s_rec, pulse_in, bytes, &pa_error ) < 0 ) {
+ errorStream_ << "RtApiPulse::callbackEvent: audio read error, " <<
+ pa_strerror( pa_error ) << ".";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ }
+ if ( stream_.doConvertBuffer[INPUT] ) {
+ convertBuffer( stream_.userBuffer[INPUT],
+ stream_.deviceBuffer,
+ stream_.convertInfo[INPUT] );
+ }
+ }
+
+ unlock:
+ MUTEX_UNLOCK( &stream_.mutex );
+ RtApi::tickStreamTime();
+
+ if ( doStopStream == 1 )
+ stopStream();
+}
+
+void RtApiPulse::startStream( void )
+{
+ PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiPulse::startStream(): the stream is not open!";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiPulse::startStream(): the stream is already running!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ stream_.state = STREAM_RUNNING;
+
+ pah->runnable = true;
+ pthread_cond_signal( &pah->runnable_cv );
+ MUTEX_UNLOCK( &stream_.mutex );
+}
+
+void RtApiPulse::stopStream( void )
+{
+ PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiPulse::stopStream(): the stream is not open!";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiPulse::stopStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ stream_.state = STREAM_STOPPED;
+ MUTEX_LOCK( &stream_.mutex );
+
+ if ( pah && pah->s_play ) {
+ int pa_error;
+ if ( pa_simple_drain( pah->s_play, &pa_error ) < 0 ) {
+ errorStream_ << "RtApiPulse::stopStream: error draining output device, " <<
+ pa_strerror( pa_error ) << ".";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ }
+
+ stream_.state = STREAM_STOPPED;
+ MUTEX_UNLOCK( &stream_.mutex );
+}
+
+void RtApiPulse::abortStream( void )
+{
+ PulseAudioHandle *pah = static_cast<PulseAudioHandle*>( stream_.apiHandle );
+
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiPulse::abortStream(): the stream is not open!";
+ error( RtAudioError::INVALID_USE );
+ return;
+ }
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiPulse::abortStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ stream_.state = STREAM_STOPPED;
+ MUTEX_LOCK( &stream_.mutex );
+
+ if ( pah && pah->s_play ) {
+ int pa_error;
+ if ( pa_simple_flush( pah->s_play, &pa_error ) < 0 ) {
+ errorStream_ << "RtApiPulse::abortStream: error flushing output device, " <<
+ pa_strerror( pa_error ) << ".";
+ errorText_ = errorStream_.str();
+ MUTEX_UNLOCK( &stream_.mutex );
+ error( RtAudioError::SYSTEM_ERROR );
+ return;
+ }
+ }
+
+ stream_.state = STREAM_STOPPED;
+ MUTEX_UNLOCK( &stream_.mutex );
+}
+
+bool RtApiPulse::probeDeviceOpen( unsigned int device, StreamMode mode,
+ unsigned int channels, unsigned int firstChannel,
+ unsigned int sampleRate, RtAudioFormat format,
+ unsigned int *bufferSize, RtAudio::StreamOptions *options )
+{
+ PulseAudioHandle *pah = 0;
+ unsigned long bufferBytes = 0;
+ pa_sample_spec ss;
+
+ if ( device != 0 ) return false;
+ if ( mode != INPUT && mode != OUTPUT ) return false;
+ if ( channels != 1 && channels != 2 ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: unsupported number of channels.";
+ return false;
+ }
+ ss.channels = channels;
+
+ if ( firstChannel != 0 ) return false;
+
+ bool sr_found = false;
+ for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr ) {
+ if ( sampleRate == *sr ) {
+ sr_found = true;
+ stream_.sampleRate = sampleRate;
+ ss.rate = sampleRate;
+ break;
+ }
+ }
+ if ( !sr_found ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: unsupported sample rate.";
+ return false;
+ }
+
+ bool sf_found = 0;
+ for ( const rtaudio_pa_format_mapping_t *sf = supported_sampleformats;
+ sf->rtaudio_format && sf->pa_format != PA_SAMPLE_INVALID; ++sf ) {
+ if ( format == sf->rtaudio_format ) {
+ sf_found = true;
+ stream_.userFormat = sf->rtaudio_format;
+ stream_.deviceFormat[mode] = stream_.userFormat;
+ ss.format = sf->pa_format;
+ break;
+ }
+ }
+ if ( !sf_found ) { // Use internal data format conversion.
+ stream_.userFormat = format;
+ stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+ ss.format = PA_SAMPLE_FLOAT32LE;
+ }
+
+ // Set other stream parameters.
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+ else stream_.userInterleaved = true;
+ stream_.deviceInterleaved[mode] = true;
+ stream_.nBuffers = 1;
+ stream_.doByteSwap[mode] = false;
+ stream_.nUserChannels[mode] = channels;
+ stream_.nDeviceChannels[mode] = channels + firstChannel;
+ stream_.channelOffset[mode] = 0;
+ std::string streamName = "RtAudio";
+
+ // Set flags for buffer conversion.
+ stream_.doConvertBuffer[mode] = false;
+ if ( stream_.userFormat != stream_.deviceFormat[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+ stream_.doConvertBuffer[mode] = true;
+
+ // Allocate necessary internal buffers.
+ bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+ stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.userBuffer[mode] == NULL ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: error allocating user buffer memory.";
+ goto error;
+ }
+ stream_.bufferSize = *bufferSize;
+
+ if ( stream_.doConvertBuffer[mode] ) {
+
+ bool makeBuffer = true;
+ bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+ if ( mode == INPUT ) {
+ if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+ unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+ if ( bufferBytes <= bytesOut ) makeBuffer = false;
+ }
+ }
+
+ if ( makeBuffer ) {
+ bufferBytes *= *bufferSize;
+ if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+ stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.deviceBuffer == NULL ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: error allocating device buffer memory.";
+ goto error;
+ }
+ }
+ }
+
+ stream_.device[mode] = device;
+
+ // Setup the buffer conversion information structure.
+ if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+ if ( !stream_.apiHandle ) {
+ PulseAudioHandle *pah = new PulseAudioHandle;
+ if ( !pah ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: error allocating memory for handle.";
+ goto error;
+ }
+
+ stream_.apiHandle = pah;
+ if ( pthread_cond_init( &pah->runnable_cv, NULL ) != 0 ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: error creating condition variable.";
+ goto error;
+ }
+ }
+ pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+ int error;
+ if ( options && !options->streamName.empty() ) streamName = options->streamName;
+ switch ( mode ) {
+ case INPUT:
+ pa_buffer_attr buffer_attr;
+ buffer_attr.fragsize = bufferBytes;
+ buffer_attr.maxlength = -1;
+
+ pah->s_rec = pa_simple_new( NULL, streamName.c_str(), PA_STREAM_RECORD, NULL, "Record", &ss, NULL, &buffer_attr, &error );
+ if ( !pah->s_rec ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: error connecting input to PulseAudio server.";
+ goto error;
+ }
+ break;
+ case OUTPUT:
+ pah->s_play = pa_simple_new( NULL, "RtAudio", PA_STREAM_PLAYBACK, NULL, "Playback", &ss, NULL, NULL, &error );
+ if ( !pah->s_play ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: error connecting output to PulseAudio server.";
+ goto error;
+ }
+ break;
+ default:
+ goto error;
+ }
+
+ if ( stream_.mode == UNINITIALIZED )
+ stream_.mode = mode;
+ else if ( stream_.mode == mode )
+ goto error;
+ else
+ stream_.mode = DUPLEX;
+
+ if ( !stream_.callbackInfo.isRunning ) {
+ stream_.callbackInfo.object = this;
+ stream_.callbackInfo.isRunning = true;
+ if ( pthread_create( &pah->thread, NULL, pulseaudio_callback, (void *)&stream_.callbackInfo) != 0 ) {
+ errorText_ = "RtApiPulse::probeDeviceOpen: error creating thread.";
+ goto error;
+ }
+ }
+
+ stream_.state = STREAM_STOPPED;
+ return true;
+
+ error:
+ if ( pah && stream_.callbackInfo.isRunning ) {
+ pthread_cond_destroy( &pah->runnable_cv );
+ delete pah;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ return FAILURE;
+}
+
+//******************** End of __LINUX_PULSE__ *********************//
+#endif
+
+#if defined(__LINUX_OSS__)
+
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/soundcard.h>
+#include <errno.h>
+#include <math.h>
+
+static void *ossCallbackHandler(void * ptr);
+
+// A structure to hold various information related to the OSS API
+// implementation.
+struct OssHandle {
+ int id[2]; // device ids
+ bool xrun[2];
+ bool triggered;
+ pthread_cond_t runnable;
+
+ OssHandle()
+ :triggered(false) { id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
+};
+
+RtApiOss :: RtApiOss()
+{
+ // Nothing to do here.
+}
+
+RtApiOss :: ~RtApiOss()
+{
+ if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiOss :: getDeviceCount( void )
+{
+ int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
+ if ( mixerfd == -1 ) {
+ errorText_ = "RtApiOss::getDeviceCount: error opening '/dev/mixer'.";
+ error( RtAudioError::WARNING );
+ return 0;
+ }
+
+ oss_sysinfo sysinfo;
+ if ( ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ) == -1 ) {
+ close( mixerfd );
+ errorText_ = "RtApiOss::getDeviceCount: error getting sysinfo, OSS version >= 4.0 is required.";
+ error( RtAudioError::WARNING );
+ return 0;
+ }
+
+ close( mixerfd );
+ return sysinfo.numaudios;
+}
+
+RtAudio::DeviceInfo RtApiOss :: getDeviceInfo( unsigned int device )
+{
+ RtAudio::DeviceInfo info;
+ info.probed = false;
+
+ int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
+ if ( mixerfd == -1 ) {
+ errorText_ = "RtApiOss::getDeviceInfo: error opening '/dev/mixer'.";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ oss_sysinfo sysinfo;
+ int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
+ if ( result == -1 ) {
+ close( mixerfd );
+ errorText_ = "RtApiOss::getDeviceInfo: error getting sysinfo, OSS version >= 4.0 is required.";
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ unsigned nDevices = sysinfo.numaudios;
+ if ( nDevices == 0 ) {
+ close( mixerfd );
+ errorText_ = "RtApiOss::getDeviceInfo: no devices found!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ if ( device >= nDevices ) {
+ close( mixerfd );
+ errorText_ = "RtApiOss::getDeviceInfo: device ID is invalid!";
+ error( RtAudioError::INVALID_USE );
+ return info;
+ }
+
+ oss_audioinfo ainfo;
+ ainfo.dev = device;
+ result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
+ close( mixerfd );
+ if ( result == -1 ) {
+ errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Probe channels
+ if ( ainfo.caps & PCM_CAP_OUTPUT ) info.outputChannels = ainfo.max_channels;
+ if ( ainfo.caps & PCM_CAP_INPUT ) info.inputChannels = ainfo.max_channels;
+ if ( ainfo.caps & PCM_CAP_DUPLEX ) {
+ if ( info.outputChannels > 0 && info.inputChannels > 0 && ainfo.caps & PCM_CAP_DUPLEX )
+ info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+ }
+
+ // Probe data formats ... do for input
+ unsigned long mask = ainfo.iformats;
+ if ( mask & AFMT_S16_LE || mask & AFMT_S16_BE )
+ info.nativeFormats |= RTAUDIO_SINT16;
+ if ( mask & AFMT_S8 )
+ info.nativeFormats |= RTAUDIO_SINT8;
+ if ( mask & AFMT_S32_LE || mask & AFMT_S32_BE )
+ info.nativeFormats |= RTAUDIO_SINT32;
+ if ( mask & AFMT_FLOAT )
+ info.nativeFormats |= RTAUDIO_FLOAT32;
+ if ( mask & AFMT_S24_LE || mask & AFMT_S24_BE )
+ info.nativeFormats |= RTAUDIO_SINT24;
+
+ // Check that we have at least one supported format
+ if ( info.nativeFormats == 0 ) {
+ errorStream_ << "RtApiOss::getDeviceInfo: device (" << ainfo.name << ") data format not supported by RtAudio.";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ return info;
+ }
+
+ // Probe the supported sample rates.
+ info.sampleRates.clear();
+ if ( ainfo.nrates ) {
+ for ( unsigned int i=0; i<ainfo.nrates; i++ ) {
+ for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+ if ( ainfo.rates[i] == SAMPLE_RATES[k] ) {
+ info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+ if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+ info.preferredSampleRate = SAMPLE_RATES[k];
+
+ break;
+ }
+ }
+ }
+ }
+ else {
+ // Check min and max rate values;
+ for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+ if ( ainfo.min_rate <= (int) SAMPLE_RATES[k] && ainfo.max_rate >= (int) SAMPLE_RATES[k] ) {
+ info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+ if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+ info.preferredSampleRate = SAMPLE_RATES[k];
+ }
+ }
+ }
+
+ if ( info.sampleRates.size() == 0 ) {
+ errorStream_ << "RtApiOss::getDeviceInfo: no supported sample rates found for device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ error( RtAudioError::WARNING );
+ }
+ else {
+ info.probed = true;
+ info.name = ainfo.name;
+ }
+
+ return info;
+}
+
+
+bool RtApiOss :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+ unsigned int firstChannel, unsigned int sampleRate,
+ RtAudioFormat format, unsigned int *bufferSize,
+ RtAudio::StreamOptions *options )
+{
+ int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
+ if ( mixerfd == -1 ) {
+ errorText_ = "RtApiOss::probeDeviceOpen: error opening '/dev/mixer'.";
+ return FAILURE;
+ }
+
+ oss_sysinfo sysinfo;
+ int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
+ if ( result == -1 ) {
+ close( mixerfd );
+ errorText_ = "RtApiOss::probeDeviceOpen: error getting sysinfo, OSS version >= 4.0 is required.";
+ return FAILURE;
+ }
+
+ unsigned nDevices = sysinfo.numaudios;
+ if ( nDevices == 0 ) {
+ // This should not happen because a check is made before this function is called.
+ close( mixerfd );
+ errorText_ = "RtApiOss::probeDeviceOpen: no devices found!";
+ return FAILURE;
+ }
+
+ if ( device >= nDevices ) {
+ // This should not happen because a check is made before this function is called.
+ close( mixerfd );
+ errorText_ = "RtApiOss::probeDeviceOpen: device ID is invalid!";
+ return FAILURE;
+ }
+
+ oss_audioinfo ainfo;
+ ainfo.dev = device;
+ result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
+ close( mixerfd );
+ if ( result == -1 ) {
+ errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Check if device supports input or output
+ if ( ( mode == OUTPUT && !( ainfo.caps & PCM_CAP_OUTPUT ) ) ||
+ ( mode == INPUT && !( ainfo.caps & PCM_CAP_INPUT ) ) ) {
+ if ( mode == OUTPUT )
+ errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support output.";
+ else
+ errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support input.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ int flags = 0;
+ OssHandle *handle = (OssHandle *) stream_.apiHandle;
+ if ( mode == OUTPUT )
+ flags |= O_WRONLY;
+ else { // mode == INPUT
+ if (stream_.mode == OUTPUT && stream_.device[0] == device) {
+ // We just set the same device for playback ... close and reopen for duplex (OSS only).
+ close( handle->id[0] );
+ handle->id[0] = 0;
+ if ( !( ainfo.caps & PCM_CAP_DUPLEX ) ) {
+ errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support duplex mode.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ // Check that the number previously set channels is the same.
+ if ( stream_.nUserChannels[0] != channels ) {
+ errorStream_ << "RtApiOss::probeDeviceOpen: input/output channels must be equal for OSS duplex device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ flags |= O_RDWR;
+ }
+ else
+ flags |= O_RDONLY;
+ }
+
+ // Set exclusive access if specified.
+ if ( options && options->flags & RTAUDIO_HOG_DEVICE ) flags |= O_EXCL;
+
+ // Try to open the device.
+ int fd;
+ fd = open( ainfo.devnode, flags, 0 );
+ if ( fd == -1 ) {
+ if ( errno == EBUSY )
+ errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") is busy.";
+ else
+ errorStream_ << "RtApiOss::probeDeviceOpen: error opening device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // For duplex operation, specifically set this mode (this doesn't seem to work).
+ /*
+ if ( flags | O_RDWR ) {
+ result = ioctl( fd, SNDCTL_DSP_SETDUPLEX, NULL );
+ if ( result == -1) {
+ errorStream_ << "RtApiOss::probeDeviceOpen: error setting duplex mode for device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ }
+ */
+
+ // Check the device channel support.
+ stream_.nUserChannels[mode] = channels;
+ if ( ainfo.max_channels < (int)(channels + firstChannel) ) {
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: the device (" << ainfo.name << ") does not support requested channel parameters.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Set the number of channels.
+ int deviceChannels = channels + firstChannel;
+ result = ioctl( fd, SNDCTL_DSP_CHANNELS, &deviceChannels );
+ if ( result == -1 || deviceChannels < (int)(channels + firstChannel) ) {
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: error setting channel parameters on device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ stream_.nDeviceChannels[mode] = deviceChannels;
+
+ // Get the data format mask
+ int mask;
+ result = ioctl( fd, SNDCTL_DSP_GETFMTS, &mask );
+ if ( result == -1 ) {
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: error getting device (" << ainfo.name << ") data formats.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Determine how to set the device format.
+ stream_.userFormat = format;
+ int deviceFormat = -1;
+ stream_.doByteSwap[mode] = false;
+ if ( format == RTAUDIO_SINT8 ) {
+ if ( mask & AFMT_S8 ) {
+ deviceFormat = AFMT_S8;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+ }
+ }
+ else if ( format == RTAUDIO_SINT16 ) {
+ if ( mask & AFMT_S16_NE ) {
+ deviceFormat = AFMT_S16_NE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ }
+ else if ( mask & AFMT_S16_OE ) {
+ deviceFormat = AFMT_S16_OE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ stream_.doByteSwap[mode] = true;
+ }
+ }
+ else if ( format == RTAUDIO_SINT24 ) {
+ if ( mask & AFMT_S24_NE ) {
+ deviceFormat = AFMT_S24_NE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+ }
+ else if ( mask & AFMT_S24_OE ) {
+ deviceFormat = AFMT_S24_OE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+ stream_.doByteSwap[mode] = true;
+ }
+ }
+ else if ( format == RTAUDIO_SINT32 ) {
+ if ( mask & AFMT_S32_NE ) {
+ deviceFormat = AFMT_S32_NE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+ }
+ else if ( mask & AFMT_S32_OE ) {
+ deviceFormat = AFMT_S32_OE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+ stream_.doByteSwap[mode] = true;
+ }
+ }
+
+ if ( deviceFormat == -1 ) {
+ // The user requested format is not natively supported by the device.
+ if ( mask & AFMT_S16_NE ) {
+ deviceFormat = AFMT_S16_NE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ }
+ else if ( mask & AFMT_S32_NE ) {
+ deviceFormat = AFMT_S32_NE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+ }
+ else if ( mask & AFMT_S24_NE ) {
+ deviceFormat = AFMT_S24_NE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+ }
+ else if ( mask & AFMT_S16_OE ) {
+ deviceFormat = AFMT_S16_OE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+ stream_.doByteSwap[mode] = true;
+ }
+ else if ( mask & AFMT_S32_OE ) {
+ deviceFormat = AFMT_S32_OE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+ stream_.doByteSwap[mode] = true;
+ }
+ else if ( mask & AFMT_S24_OE ) {
+ deviceFormat = AFMT_S24_OE;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+ stream_.doByteSwap[mode] = true;
+ }
+ else if ( mask & AFMT_S8) {
+ deviceFormat = AFMT_S8;
+ stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+ }
+ }
+
+ if ( stream_.deviceFormat[mode] == 0 ) {
+ // This really shouldn't happen ...
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") data format not supported by RtAudio.";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Set the data format.
+ int temp = deviceFormat;
+ result = ioctl( fd, SNDCTL_DSP_SETFMT, &deviceFormat );
+ if ( result == -1 || deviceFormat != temp ) {
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: error setting data format on device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Attempt to set the buffer size. According to OSS, the minimum
+ // number of buffers is two. The supposed minimum buffer size is 16
+ // bytes, so that will be our lower bound. The argument to this
+ // call is in the form 0xMMMMSSSS (hex), where the buffer size (in
+ // bytes) is given as 2^SSSS and the number of buffers as 2^MMMM.
+ // We'll check the actual value used near the end of the setup
+ // procedure.
+ int ossBufferBytes = *bufferSize * formatBytes( stream_.deviceFormat[mode] ) * deviceChannels;
+ if ( ossBufferBytes < 16 ) ossBufferBytes = 16;
+ int buffers = 0;
+ if ( options ) buffers = options->numberOfBuffers;
+ if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) buffers = 2;
+ if ( buffers < 2 ) buffers = 3;
+ temp = ((int) buffers << 16) + (int)( log10( (double)ossBufferBytes ) / log10( 2.0 ) );
+ result = ioctl( fd, SNDCTL_DSP_SETFRAGMENT, &temp );
+ if ( result == -1 ) {
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: error setting buffer size on device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ stream_.nBuffers = buffers;
+
+ // Save buffer size (in sample frames).
+ *bufferSize = ossBufferBytes / ( formatBytes(stream_.deviceFormat[mode]) * deviceChannels );
+ stream_.bufferSize = *bufferSize;
+
+ // Set the sample rate.
+ int srate = sampleRate;
+ result = ioctl( fd, SNDCTL_DSP_SPEED, &srate );
+ if ( result == -1 ) {
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: error setting sample rate (" << sampleRate << ") on device (" << ainfo.name << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+
+ // Verify the sample rate setup worked.
+ if ( abs( srate - sampleRate ) > 100 ) {
+ close( fd );
+ errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support sample rate (" << sampleRate << ").";
+ errorText_ = errorStream_.str();
+ return FAILURE;
+ }
+ stream_.sampleRate = sampleRate;
+
+ if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device) {
+ // We're doing duplex setup here.
+ stream_.deviceFormat[0] = stream_.deviceFormat[1];
+ stream_.nDeviceChannels[0] = deviceChannels;
+ }
+
+ // Set interleaving parameters.
+ stream_.userInterleaved = true;
+ stream_.deviceInterleaved[mode] = true;
+ if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
+ stream_.userInterleaved = false;
+
+ // Set flags for buffer conversion
+ stream_.doConvertBuffer[mode] = false;
+ if ( stream_.userFormat != stream_.deviceFormat[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+ stream_.doConvertBuffer[mode] = true;
+ if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+ stream_.nUserChannels[mode] > 1 )
+ stream_.doConvertBuffer[mode] = true;
+
+ // Allocate the stream handles if necessary and then save.
+ if ( stream_.apiHandle == 0 ) {
+ try {
+ handle = new OssHandle;
+ }
+ catch ( std::bad_alloc& ) {
+ errorText_ = "RtApiOss::probeDeviceOpen: error allocating OssHandle memory.";
+ goto error;
+ }
+
+ if ( pthread_cond_init( &handle->runnable, NULL ) ) {
+ errorText_ = "RtApiOss::probeDeviceOpen: error initializing pthread condition variable.";
+ goto error;
+ }
+
+ stream_.apiHandle = (void *) handle;
+ }
+ else {
+ handle = (OssHandle *) stream_.apiHandle;
+ }
+ handle->id[mode] = fd;
+
+ // Allocate necessary internal buffers.
+ unsigned long bufferBytes;
+ bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+ stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.userBuffer[mode] == NULL ) {
+ errorText_ = "RtApiOss::probeDeviceOpen: error allocating user buffer memory.";
+ goto error;
+ }
+
+ if ( stream_.doConvertBuffer[mode] ) {
+
+ bool makeBuffer = true;
+ bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+ if ( mode == INPUT ) {
+ if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+ unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+ if ( bufferBytes <= bytesOut ) makeBuffer = false;
+ }
+ }
+
+ if ( makeBuffer ) {
+ bufferBytes *= *bufferSize;
+ if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+ stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+ if ( stream_.deviceBuffer == NULL ) {
+ errorText_ = "RtApiOss::probeDeviceOpen: error allocating device buffer memory.";
+ goto error;
+ }
+ }
+ }
+
+ stream_.device[mode] = device;
+ stream_.state = STREAM_STOPPED;
+
+ // Setup the buffer conversion information structure.
+ if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+ // Setup thread if necessary.
+ if ( stream_.mode == OUTPUT && mode == INPUT ) {
+ // We had already set up an output stream.
+ stream_.mode = DUPLEX;
+ if ( stream_.device[0] == device ) handle->id[0] = fd;
+ }
+ else {
+ stream_.mode = mode;
+
+ // Setup callback thread.
+ stream_.callbackInfo.object = (void *) this;
+
+ // Set the thread attributes for joinable and realtime scheduling
+ // priority. The higher priority will only take affect if the
+ // program is run as root or suid.
+ pthread_attr_t attr;
+ pthread_attr_init( &attr );
+ pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
+#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
+ if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
+ struct sched_param param;
+ int priority = options->priority;
+ int min = sched_get_priority_min( SCHED_RR );
+ int max = sched_get_priority_max( SCHED_RR );
+ if ( priority < min ) priority = min;
+ else if ( priority > max ) priority = max;
+ param.sched_priority = priority;
+ pthread_attr_setschedparam( &attr, &param );
+ pthread_attr_setschedpolicy( &attr, SCHED_RR );
+ }
+ else
+ pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
+#else
+ pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
+#endif
+
+ stream_.callbackInfo.isRunning = true;
+ result = pthread_create( &stream_.callbackInfo.thread, &attr, ossCallbackHandler, &stream_.callbackInfo );
+ pthread_attr_destroy( &attr );
+ if ( result ) {
+ stream_.callbackInfo.isRunning = false;
+ errorText_ = "RtApiOss::error creating callback thread!";
+ goto error;
+ }
+ }
+
+ return SUCCESS;
+
+ error:
+ if ( handle ) {
+ pthread_cond_destroy( &handle->runnable );
+ if ( handle->id[0] ) close( handle->id[0] );
+ if ( handle->id[1] ) close( handle->id[1] );
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ return FAILURE;
+}
+
+void RtApiOss :: closeStream()
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiOss::closeStream(): no open stream to close!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ OssHandle *handle = (OssHandle *) stream_.apiHandle;
+ stream_.callbackInfo.isRunning = false;
+ MUTEX_LOCK( &stream_.mutex );
+ if ( stream_.state == STREAM_STOPPED )
+ pthread_cond_signal( &handle->runnable );
+ MUTEX_UNLOCK( &stream_.mutex );
+ pthread_join( stream_.callbackInfo.thread, NULL );
+
+ if ( stream_.state == STREAM_RUNNING ) {
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
+ ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
+ else
+ ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
+ stream_.state = STREAM_STOPPED;
+ }
+
+ if ( handle ) {
+ pthread_cond_destroy( &handle->runnable );
+ if ( handle->id[0] ) close( handle->id[0] );
+ if ( handle->id[1] ) close( handle->id[1] );
+ delete handle;
+ stream_.apiHandle = 0;
+ }
+
+ for ( int i=0; i<2; i++ ) {
+ if ( stream_.userBuffer[i] ) {
+ free( stream_.userBuffer[i] );
+ stream_.userBuffer[i] = 0;
+ }
+ }
+
+ if ( stream_.deviceBuffer ) {
+ free( stream_.deviceBuffer );
+ stream_.deviceBuffer = 0;
+ }
+
+ stream_.mode = UNINITIALIZED;
+ stream_.state = STREAM_CLOSED;
+}
+
+void RtApiOss :: startStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_RUNNING ) {
+ errorText_ = "RtApiOss::startStream(): the stream is already running!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ stream_.state = STREAM_RUNNING;
+
+ // No need to do anything else here ... OSS automatically starts
+ // when fed samples.
+
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ OssHandle *handle = (OssHandle *) stream_.apiHandle;
+ pthread_cond_signal( &handle->runnable );
+}
+
+void RtApiOss :: stopStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiOss::stopStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ // The state might change while waiting on a mutex.
+ if ( stream_.state == STREAM_STOPPED ) {
+ MUTEX_UNLOCK( &stream_.mutex );
+ return;
+ }
+
+ int result = 0;
+ OssHandle *handle = (OssHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ // Flush the output with zeros a few times.
+ char *buffer;
+ int samples;
+ RtAudioFormat format;
+
+ if ( stream_.doConvertBuffer[0] ) {
+ buffer = stream_.deviceBuffer;
+ samples = stream_.bufferSize * stream_.nDeviceChannels[0];
+ format = stream_.deviceFormat[0];
+ }
+ else {
+ buffer = stream_.userBuffer[0];
+ samples = stream_.bufferSize * stream_.nUserChannels[0];
+ format = stream_.userFormat;
+ }
+
+ memset( buffer, 0, samples * formatBytes(format) );
+ for ( unsigned int i=0; i<stream_.nBuffers+1; i++ ) {
+ result = write( handle->id[0], buffer, samples * formatBytes(format) );
+ if ( result == -1 ) {
+ errorText_ = "RtApiOss::stopStream: audio write error.";
+ error( RtAudioError::WARNING );
+ }
+ }
+
+ result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
+ if ( result == -1 ) {
+ errorStream_ << "RtApiOss::stopStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ handle->triggered = false;
+ }
+
+ if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
+ result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
+ if ( result == -1 ) {
+ errorStream_ << "RtApiOss::stopStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ unlock:
+ stream_.state = STREAM_STOPPED;
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ if ( result != -1 ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiOss :: abortStream()
+{
+ verifyStream();
+ if ( stream_.state == STREAM_STOPPED ) {
+ errorText_ = "RtApiOss::abortStream(): the stream is already stopped!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ // The state might change while waiting on a mutex.
+ if ( stream_.state == STREAM_STOPPED ) {
+ MUTEX_UNLOCK( &stream_.mutex );
+ return;
+ }
+
+ int result = 0;
+ OssHandle *handle = (OssHandle *) stream_.apiHandle;
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+ result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
+ if ( result == -1 ) {
+ errorStream_ << "RtApiOss::abortStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ handle->triggered = false;
+ }
+
+ if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
+ result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
+ if ( result == -1 ) {
+ errorStream_ << "RtApiOss::abortStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
+ errorText_ = errorStream_.str();
+ goto unlock;
+ }
+ }
+
+ unlock:
+ stream_.state = STREAM_STOPPED;
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ if ( result != -1 ) return;
+ error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiOss :: callbackEvent()
+{
+ OssHandle *handle = (OssHandle *) stream_.apiHandle;
+ if ( stream_.state == STREAM_STOPPED ) {
+ MUTEX_LOCK( &stream_.mutex );
+ pthread_cond_wait( &handle->runnable, &stream_.mutex );
+ if ( stream_.state != STREAM_RUNNING ) {
+ MUTEX_UNLOCK( &stream_.mutex );
+ return;
+ }
+ MUTEX_UNLOCK( &stream_.mutex );
+ }
+
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApiOss::callbackEvent(): the stream is closed ... this shouldn't happen!";
+ error( RtAudioError::WARNING );
+ return;
+ }
+
+ // Invoke user callback to get fresh output data.
+ int doStopStream = 0;
+ RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
+ double streamTime = getStreamTime();
+ RtAudioStreamStatus status = 0;
+ if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+ status |= RTAUDIO_OUTPUT_UNDERFLOW;
+ handle->xrun[0] = false;
+ }
+ if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+ status |= RTAUDIO_INPUT_OVERFLOW;
+ handle->xrun[1] = false;
+ }
+ doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+ stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
+ if ( doStopStream == 2 ) {
+ this->abortStream();
+ return;
+ }
+
+ MUTEX_LOCK( &stream_.mutex );
+
+ // The state might change while waiting on a mutex.
+ if ( stream_.state == STREAM_STOPPED ) goto unlock;
+
+ int result;
+ char *buffer;
+ int samples;
+ RtAudioFormat format;
+
+ if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+ // Setup parameters and do buffer conversion if necessary.
+ if ( stream_.doConvertBuffer[0] ) {
+ buffer = stream_.deviceBuffer;
+ convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+ samples = stream_.bufferSize * stream_.nDeviceChannels[0];
+ format = stream_.deviceFormat[0];
+ }
+ else {
+ buffer = stream_.userBuffer[0];
+ samples = stream_.bufferSize * stream_.nUserChannels[0];
+ format = stream_.userFormat;
+ }
+
+ // Do byte swapping if necessary.
+ if ( stream_.doByteSwap[0] )
+ byteSwapBuffer( buffer, samples, format );
+
+ if ( stream_.mode == DUPLEX && handle->triggered == false ) {
+ int trig = 0;
+ ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
+ result = write( handle->id[0], buffer, samples * formatBytes(format) );
+ trig = PCM_ENABLE_INPUT|PCM_ENABLE_OUTPUT;
+ ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
+ handle->triggered = true;
+ }
+ else
+ // Write samples to device.
+ result = write( handle->id[0], buffer, samples * formatBytes(format) );
+
+ if ( result == -1 ) {
+ // We'll assume this is an underrun, though there isn't a
+ // specific means for determining that.
+ handle->xrun[0] = true;
+ errorText_ = "RtApiOss::callbackEvent: audio write error.";
+ error( RtAudioError::WARNING );
+ // Continue on to input section.
+ }
+ }
+
+ if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+ // Setup parameters.
+ if ( stream_.doConvertBuffer[1] ) {
+ buffer = stream_.deviceBuffer;
+ samples = stream_.bufferSize * stream_.nDeviceChannels[1];
+ format = stream_.deviceFormat[1];
+ }
+ else {
+ buffer = stream_.userBuffer[1];
+ samples = stream_.bufferSize * stream_.nUserChannels[1];
+ format = stream_.userFormat;
+ }
+
+ // Read samples from device.
+ result = read( handle->id[1], buffer, samples * formatBytes(format) );
+
+ if ( result == -1 ) {
+ // We'll assume this is an overrun, though there isn't a
+ // specific means for determining that.
+ handle->xrun[1] = true;
+ errorText_ = "RtApiOss::callbackEvent: audio read error.";
+ error( RtAudioError::WARNING );
+ goto unlock;
+ }
+
+ // Do byte swapping if necessary.
+ if ( stream_.doByteSwap[1] )
+ byteSwapBuffer( buffer, samples, format );
+
+ // Do buffer conversion if necessary.
+ if ( stream_.doConvertBuffer[1] )
+ convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+ }
+
+ unlock:
+ MUTEX_UNLOCK( &stream_.mutex );
+
+ RtApi::tickStreamTime();
+ if ( doStopStream == 1 ) this->stopStream();
+}
+
+static void *ossCallbackHandler( void *ptr )
+{
+ CallbackInfo *info = (CallbackInfo *) ptr;
+ RtApiOss *object = (RtApiOss *) info->object;
+ bool *isRunning = &info->isRunning;
+
+ while ( *isRunning == true ) {
+ pthread_testcancel();
+ object->callbackEvent();
+ }
+
+ pthread_exit( NULL );
+}
+
+//******************** End of __LINUX_OSS__ *********************//
+#endif
+
+
+// *************************************************** //
+//
+// Protected common (OS-independent) RtAudio methods.
+//
+// *************************************************** //
+
+// This method can be modified to control the behavior of error
+// message printing.
+void RtApi :: error( RtAudioError::Type type )
+{
+ errorStream_.str(""); // clear the ostringstream
+
+ RtAudioErrorCallback errorCallback = (RtAudioErrorCallback) stream_.callbackInfo.errorCallback;
+ if ( errorCallback ) {
+ // abortStream() can generate new error messages. Ignore them. Just keep original one.
+
+ if ( firstErrorOccurred_ )
+ return;
+
+ firstErrorOccurred_ = true;
+ const std::string errorMessage = errorText_;
+
+ if ( type != RtAudioError::WARNING && stream_.state != STREAM_STOPPED) {
+ stream_.callbackInfo.isRunning = false; // exit from the thread
+ abortStream();
+ }
+
+ errorCallback( type, errorMessage );
+ firstErrorOccurred_ = false;
+ return;
+ }
+
+ if ( type == RtAudioError::WARNING && showWarnings_ == true )
+ std::cerr << '\n' << errorText_ << "\n\n";
+ else if ( type != RtAudioError::WARNING )
+ throw( RtAudioError( errorText_, type ) );
+}
+
+void RtApi :: verifyStream()
+{
+ if ( stream_.state == STREAM_CLOSED ) {
+ errorText_ = "RtApi:: a stream is not open!";
+ error( RtAudioError::INVALID_USE );
+ }
+}
+
+void RtApi :: clearStreamInfo()
+{
+ stream_.mode = UNINITIALIZED;
+ stream_.state = STREAM_CLOSED;
+ stream_.sampleRate = 0;
+ stream_.bufferSize = 0;
+ stream_.nBuffers = 0;
+ stream_.userFormat = 0;
+ stream_.userInterleaved = true;
+ stream_.streamTime = 0.0;
+ stream_.apiHandle = 0;
+ stream_.deviceBuffer = 0;
+ stream_.callbackInfo.callback = 0;
+ stream_.callbackInfo.userData = 0;
+ stream_.callbackInfo.isRunning = false;
+ stream_.callbackInfo.errorCallback = 0;
+ for ( int i=0; i<2; i++ ) {
+ stream_.device[i] = 11111;
+ stream_.doConvertBuffer[i] = false;
+ stream_.deviceInterleaved[i] = true;
+ stream_.doByteSwap[i] = false;
+ stream_.nUserChannels[i] = 0;
+ stream_.nDeviceChannels[i] = 0;
+ stream_.channelOffset[i] = 0;
+ stream_.deviceFormat[i] = 0;
+ stream_.latency[i] = 0;
+ stream_.userBuffer[i] = 0;
+ stream_.convertInfo[i].channels = 0;
+ stream_.convertInfo[i].inJump = 0;
+ stream_.convertInfo[i].outJump = 0;
+ stream_.convertInfo[i].inFormat = 0;
+ stream_.convertInfo[i].outFormat = 0;
+ stream_.convertInfo[i].inOffset.clear();
+ stream_.convertInfo[i].outOffset.clear();
+ }
+}
+
+unsigned int RtApi :: formatBytes( RtAudioFormat format )
+{
+ if ( format == RTAUDIO_SINT16 )
+ return 2;
+ else if ( format == RTAUDIO_SINT32 || format == RTAUDIO_FLOAT32 )
+ return 4;
+ else if ( format == RTAUDIO_FLOAT64 )
+ return 8;
+ else if ( format == RTAUDIO_SINT24 )
+ return 3;
+ else if ( format == RTAUDIO_SINT8 )
+ return 1;
+
+ errorText_ = "RtApi::formatBytes: undefined format.";
+ error( RtAudioError::WARNING );
+
+ return 0;
+}
+
+void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel )
+{
+ if ( mode == INPUT ) { // convert device to user buffer
+ stream_.convertInfo[mode].inJump = stream_.nDeviceChannels[1];
+ stream_.convertInfo[mode].outJump = stream_.nUserChannels[1];
+ stream_.convertInfo[mode].inFormat = stream_.deviceFormat[1];
+ stream_.convertInfo[mode].outFormat = stream_.userFormat;
+ }
+ else { // convert user to device buffer
+ stream_.convertInfo[mode].inJump = stream_.nUserChannels[0];
+ stream_.convertInfo[mode].outJump = stream_.nDeviceChannels[0];
+ stream_.convertInfo[mode].inFormat = stream_.userFormat;
+ stream_.convertInfo[mode].outFormat = stream_.deviceFormat[0];
+ }
+
+ if ( stream_.convertInfo[mode].inJump < stream_.convertInfo[mode].outJump )
+ stream_.convertInfo[mode].channels = stream_.convertInfo[mode].inJump;
+ else
+ stream_.convertInfo[mode].channels = stream_.convertInfo[mode].outJump;
+
+ // Set up the interleave/deinterleave offsets.
+ if ( stream_.deviceInterleaved[mode] != stream_.userInterleaved ) {
+ if ( ( mode == OUTPUT && stream_.deviceInterleaved[mode] ) ||
+ ( mode == INPUT && stream_.userInterleaved ) ) {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+ stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
+ stream_.convertInfo[mode].outOffset.push_back( k );
+ stream_.convertInfo[mode].inJump = 1;
+ }
+ }
+ else {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+ stream_.convertInfo[mode].inOffset.push_back( k );
+ stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
+ stream_.convertInfo[mode].outJump = 1;
+ }
+ }
+ }
+ else { // no (de)interleaving
+ if ( stream_.userInterleaved ) {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+ stream_.convertInfo[mode].inOffset.push_back( k );
+ stream_.convertInfo[mode].outOffset.push_back( k );
+ }
+ }
+ else {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+ stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
+ stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
+ stream_.convertInfo[mode].inJump = 1;
+ stream_.convertInfo[mode].outJump = 1;
+ }
+ }
+ }
+
+ // Add channel offset.
+ if ( firstChannel > 0 ) {
+ if ( stream_.deviceInterleaved[mode] ) {
+ if ( mode == OUTPUT ) {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+ stream_.convertInfo[mode].outOffset[k] += firstChannel;
+ }
+ else {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+ stream_.convertInfo[mode].inOffset[k] += firstChannel;
+ }
+ }
+ else {
+ if ( mode == OUTPUT ) {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+ stream_.convertInfo[mode].outOffset[k] += ( firstChannel * stream_.bufferSize );
+ }
+ else {
+ for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+ stream_.convertInfo[mode].inOffset[k] += ( firstChannel * stream_.bufferSize );
+ }
+ }
+ }
+}
+
+void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info )
+{
+ // This function does format conversion, input/output channel compensation, and
+ // data interleaving/deinterleaving. 24-bit integers are assumed to occupy
+ // the lower three bytes of a 32-bit integer.
+
+ // Clear our device buffer when in/out duplex device channels are different
+ if ( outBuffer == stream_.deviceBuffer && stream_.mode == DUPLEX &&
+ ( stream_.nDeviceChannels[0] < stream_.nDeviceChannels[1] ) )
+ memset( outBuffer, 0, stream_.bufferSize * info.outJump * formatBytes( info.outFormat ) );
+
+ int j;
+ if (info.outFormat == RTAUDIO_FLOAT64) {
+ Float64 scale;
+ Float64 *out = (Float64 *)outBuffer;
+
+ if (info.inFormat == RTAUDIO_SINT8) {
+ signed char *in = (signed char *)inBuffer;
+ scale = 1.0 / 127.5;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT16) {
+ Int16 *in = (Int16 *)inBuffer;
+ scale = 1.0 / 32767.5;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT24) {
+ Int24 *in = (Int24 *)inBuffer;
+ scale = 1.0 / 8388607.5;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float64) (in[info.inOffset[j]].asInt());
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT32) {
+ Int32 *in = (Int32 *)inBuffer;
+ scale = 1.0 / 2147483647.5;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT32) {
+ Float32 *in = (Float32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT64) {
+ // Channel compensation and/or (de)interleaving only.
+ Float64 *in = (Float64 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ }
+ else if (info.outFormat == RTAUDIO_FLOAT32) {
+ Float32 scale;
+ Float32 *out = (Float32 *)outBuffer;
+
+ if (info.inFormat == RTAUDIO_SINT8) {
+ signed char *in = (signed char *)inBuffer;
+ scale = (Float32) ( 1.0 / 127.5 );
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT16) {
+ Int16 *in = (Int16 *)inBuffer;
+ scale = (Float32) ( 1.0 / 32767.5 );
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT24) {
+ Int24 *in = (Int24 *)inBuffer;
+ scale = (Float32) ( 1.0 / 8388607.5 );
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float32) (in[info.inOffset[j]].asInt());
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT32) {
+ Int32 *in = (Int32 *)inBuffer;
+ scale = (Float32) ( 1.0 / 2147483647.5 );
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+ out[info.outOffset[j]] += 0.5;
+ out[info.outOffset[j]] *= scale;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT32) {
+ // Channel compensation and/or (de)interleaving only.
+ Float32 *in = (Float32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT64) {
+ Float64 *in = (Float64 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ }
+ else if (info.outFormat == RTAUDIO_SINT32) {
+ Int32 *out = (Int32 *)outBuffer;
+ if (info.inFormat == RTAUDIO_SINT8) {
+ signed char *in = (signed char *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
+ out[info.outOffset[j]] <<= 24;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT16) {
+ Int16 *in = (Int16 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
+ out[info.outOffset[j]] <<= 16;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT24) {
+ Int24 *in = (Int24 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) in[info.inOffset[j]].asInt();
+ out[info.outOffset[j]] <<= 8;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT32) {
+ // Channel compensation and/or (de)interleaving only.
+ Int32 *in = (Int32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT32) {
+ Float32 *in = (Float32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT64) {
+ Float64 *in = (Float64 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ }
+ else if (info.outFormat == RTAUDIO_SINT24) {
+ Int24 *out = (Int24 *)outBuffer;
+ if (info.inFormat == RTAUDIO_SINT8) {
+ signed char *in = (signed char *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 16);
+ //out[info.outOffset[j]] <<= 16;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT16) {
+ Int16 *in = (Int16 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 8);
+ //out[info.outOffset[j]] <<= 8;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT24) {
+ // Channel compensation and/or (de)interleaving only.
+ Int24 *in = (Int24 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT32) {
+ Int32 *in = (Int32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] >> 8);
+ //out[info.outOffset[j]] >>= 8;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT32) {
+ Float32 *in = (Float32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT64) {
+ Float64 *in = (Float64 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ }
+ else if (info.outFormat == RTAUDIO_SINT16) {
+ Int16 *out = (Int16 *)outBuffer;
+ if (info.inFormat == RTAUDIO_SINT8) {
+ signed char *in = (signed char *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int16) in[info.inOffset[j]];
+ out[info.outOffset[j]] <<= 8;
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT16) {
+ // Channel compensation and/or (de)interleaving only.
+ Int16 *in = (Int16 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT24) {
+ Int24 *in = (Int24 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]].asInt() >> 8);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT32) {
+ Int32 *in = (Int32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int16) ((in[info.inOffset[j]] >> 16) & 0x0000ffff);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT32) {
+ Float32 *in = (Float32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT64) {
+ Float64 *in = (Float64 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ }
+ else if (info.outFormat == RTAUDIO_SINT8) {
+ signed char *out = (signed char *)outBuffer;
+ if (info.inFormat == RTAUDIO_SINT8) {
+ // Channel compensation and/or (de)interleaving only.
+ signed char *in = (signed char *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = in[info.inOffset[j]];
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ if (info.inFormat == RTAUDIO_SINT16) {
+ Int16 *in = (Int16 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 8) & 0x00ff);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT24) {
+ Int24 *in = (Int24 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]].asInt() >> 16);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_SINT32) {
+ Int32 *in = (Int32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 24) & 0x000000ff);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT32) {
+ Float32 *in = (Float32 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ else if (info.inFormat == RTAUDIO_FLOAT64) {
+ Float64 *in = (Float64 *)inBuffer;
+ for (unsigned int i=0; i<stream_.bufferSize; i++) {
+ for (j=0; j<info.channels; j++) {
+ out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
+ }
+ in += info.inJump;
+ out += info.outJump;
+ }
+ }
+ }
+}
+
+//static inline uint16_t bswap_16(uint16_t x) { return (x>>8) | (x<<8); }
+//static inline uint32_t bswap_32(uint32_t x) { return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16)); }
+//static inline uint64_t bswap_64(uint64_t x) { return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32)); }
+
+void RtApi :: byteSwapBuffer( char *buffer, unsigned int samples, RtAudioFormat format )
+{
+ register char val;
+ register char *ptr;
+
+ ptr = buffer;
+ if ( format == RTAUDIO_SINT16 ) {
+ for ( unsigned int i=0; i<samples; i++ ) {
+ // Swap 1st and 2nd bytes.
+ val = *(ptr);
+ *(ptr) = *(ptr+1);
+ *(ptr+1) = val;
+
+ // Increment 2 bytes.
+ ptr += 2;
+ }
+ }
+ else if ( format == RTAUDIO_SINT32 ||
+ format == RTAUDIO_FLOAT32 ) {
+ for ( unsigned int i=0; i<samples; i++ ) {
+ // Swap 1st and 4th bytes.
+ val = *(ptr);
+ *(ptr) = *(ptr+3);
+ *(ptr+3) = val;
+
+ // Swap 2nd and 3rd bytes.
+ ptr += 1;
+ val = *(ptr);
+ *(ptr) = *(ptr+1);
+ *(ptr+1) = val;
+
+ // Increment 3 more bytes.
+ ptr += 3;
+ }
+ }
+ else if ( format == RTAUDIO_SINT24 ) {
+ for ( unsigned int i=0; i<samples; i++ ) {
+ // Swap 1st and 3rd bytes.
+ val = *(ptr);
+ *(ptr) = *(ptr+2);
+ *(ptr+2) = val;
+
+ // Increment 2 more bytes.
+ ptr += 2;
+ }
+ }
+ else if ( format == RTAUDIO_FLOAT64 ) {
+ for ( unsigned int i=0; i<samples; i++ ) {
+ // Swap 1st and 8th bytes
+ val = *(ptr);
+ *(ptr) = *(ptr+7);
+ *(ptr+7) = val;
+
+ // Swap 2nd and 7th bytes
+ ptr += 1;
+ val = *(ptr);
+ *(ptr) = *(ptr+5);
+ *(ptr+5) = val;
+
+ // Swap 3rd and 6th bytes
+ ptr += 1;
+ val = *(ptr);
+ *(ptr) = *(ptr+3);
+ *(ptr+3) = val;
+
+ // Swap 4th and 5th bytes
+ ptr += 1;
+ val = *(ptr);
+ *(ptr) = *(ptr+1);
+ *(ptr+1) = val;
+
+ // Increment 5 more bytes.
+ ptr += 5;
+ }
+ }
+}
+
+ // Indentation settings for Vim and Emacs
+ //
+ // Local Variables:
+ // c-basic-offset: 2
+ // indent-tabs-mode: nil
+ // End:
+ //
+ // vim: et sts=2 sw=2
+
+#endif
diff --git a/drivers/speex/audio_stream_speex.cpp b/drivers/speex/audio_stream_speex.cpp
index 2cffb17049..1bb4952cc8 100644
--- a/drivers/speex/audio_stream_speex.cpp
+++ b/drivers/speex/audio_stream_speex.cpp
@@ -15,14 +15,15 @@ static _FORCE_INLINE_ uint16_t le_short(uint16_t s)
}
-void AudioStreamSpeex::update() {
+int AudioStreamPlaybackSpeex::mix(int16_t* p_buffer,int p_frames) {
+
+
- _THREAD_SAFE_METHOD_;
//printf("update, loops %i, read ofs %i\n", (int)loops, read_ofs);
//printf("playing %i, paused %i\n", (int)playing, (int)paused);
- if (!active || !playing || paused || !data.size())
- return;
+ if (!active || !playing || !data.size())
+ return 0;
/*
if (read_ofs >= data.size()) {
@@ -35,12 +36,13 @@ void AudioStreamSpeex::update() {
};
*/
- int todo = get_todo();
+ int todo = p_frames;
if (todo < page_size) {
- return;
+ return 0;
};
- int eos = 0;
+ int eos = 0;
+ bool reloaded=false;
while (todo > page_size) {
@@ -92,7 +94,7 @@ void AudioStreamSpeex::update() {
for (int j=0;j!=nframes;j++)
{
- int16_t* out = get_write_buffer();
+ int16_t* out = p_buffer;
int ret;
/*Decode frame*/
@@ -120,7 +122,7 @@ void AudioStreamSpeex::update() {
/*Convert to short and save to output file*/
- for (int i=0;i<frame_size*get_channel_count();i++) {
+ for (int i=0;i<frame_size*stream_channels;i++) {
out[i]=le_short(out[i]);
}
@@ -149,7 +151,7 @@ void AudioStreamSpeex::update() {
}
- write(new_frame_size);
+ p_buffer+=new_frame_size*stream_channels;
todo-=new_frame_size;
}
}
@@ -175,6 +177,7 @@ void AudioStreamSpeex::update() {
if (loops) {
reload();
++loop_count;
+ //break;
} else {
playing=false;
unload();
@@ -183,18 +186,22 @@ void AudioStreamSpeex::update() {
}
};
};
+
+ return p_frames-todo;
};
-void AudioStreamSpeex::unload() {
+void AudioStreamPlaybackSpeex::unload() {
+
- _THREAD_SAFE_METHOD_
if (!active) return;
speex_bits_destroy(&bits);
if (st)
speex_decoder_destroy(st);
+
+ ogg_sync_clear(&oy);
active = false;
//data.resize(0);
st = NULL;
@@ -204,7 +211,7 @@ void AudioStreamSpeex::unload() {
loop_count = 0;
}
-void *AudioStreamSpeex::process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers) {
+void *AudioStreamPlaybackSpeex::process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers) {
void *st;
SpeexHeader *header;
@@ -276,9 +283,9 @@ void *AudioStreamSpeex::process_header(ogg_packet *op, int *frame_size, int *rat
-void AudioStreamSpeex::reload() {
+void AudioStreamPlaybackSpeex::reload() {
+
- _THREAD_SAFE_METHOD_
if (active)
unload();
@@ -359,8 +366,10 @@ void AudioStreamSpeex::reload() {
};
page_size = nframes * frame_size;
+ stream_srate=rate;
+ stream_channels=channels;
+ stream_minbuff_size=page_size;
- _setup(channels, rate,page_size);
} else if (packet_count==1)
{
@@ -374,23 +383,23 @@ void AudioStreamSpeex::reload() {
} while (packet_count <= extra_headers);
- active = true;
+ active=true;
}
-void AudioStreamSpeex::_bind_methods() {
+void AudioStreamPlaybackSpeex::_bind_methods() {
- ObjectTypeDB::bind_method(_MD("set_file","file"),&AudioStreamSpeex::set_file);
- ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamSpeex::get_file);
+ //ObjectTypeDB::bind_method(_MD("set_file","file"),&AudioStreamPlaybackSpeex::set_file);
+// ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamPlaybackSpeex::get_file);
- ObjectTypeDB::bind_method(_MD("_set_bundled"),&AudioStreamSpeex::_set_bundled);
- ObjectTypeDB::bind_method(_MD("_get_bundled"),&AudioStreamSpeex::_get_bundled);
+ ObjectTypeDB::bind_method(_MD("_set_bundled"),&AudioStreamPlaybackSpeex::_set_bundled);
+ ObjectTypeDB::bind_method(_MD("_get_bundled"),&AudioStreamPlaybackSpeex::_get_bundled);
ADD_PROPERTY( PropertyInfo(Variant::DICTIONARY,"_bundled",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_BUNDLE),_SCS("_set_bundled"),_SCS("_get_bundled"));
- ADD_PROPERTY( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"*.spx"),_SCS("set_file"),_SCS("get_file"));
+ //ADD_PROPERTY( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"*.spx"),_SCS("set_file"),_SCS("get_file"));
};
-void AudioStreamSpeex::_set_bundled(const Dictionary& dict) {
+void AudioStreamPlaybackSpeex::_set_bundled(const Dictionary& dict) {
ERR_FAIL_COND( !dict.has("filename"));
ERR_FAIL_COND( !dict.has("data"));
@@ -399,7 +408,7 @@ void AudioStreamSpeex::_set_bundled(const Dictionary& dict) {
data = dict["data"];
};
-Dictionary AudioStreamSpeex::_get_bundled() const {
+Dictionary AudioStreamPlaybackSpeex::_get_bundled() const {
Dictionary d;
d["filename"] = filename;
@@ -408,43 +417,17 @@ Dictionary AudioStreamSpeex::_get_bundled() const {
};
-String AudioStreamSpeex::get_file() const {
-
- return filename;
-};
-
-void AudioStreamSpeex::set_file(const String& p_file){
-
- if (filename == p_file)
- return;
-
- if (active) {
- unload();
- }
-
- if (p_file == "") {
- data.resize(0);
- return;
- };
-
- Error err;
- FileAccess* file = FileAccess::open(p_file, FileAccess::READ,&err);
- if (err != OK) {
- data.resize(0);
- };
- ERR_FAIL_COND(err != OK);
- filename = p_file;
- data.resize(file->get_len());
- int read = file->get_buffer(&data[0], data.size());
- memdelete(file);
+void AudioStreamPlaybackSpeex::set_data(const Vector<uint8_t>& p_data) {
+ data=p_data;
reload();
}
-void AudioStreamSpeex::play() {
- _THREAD_SAFE_METHOD_
+void AudioStreamPlaybackSpeex::play(float p_from_pos) {
+
+
reload();
if (!active)
@@ -452,82 +435,103 @@ void AudioStreamSpeex::play() {
playing = true;
}
-void AudioStreamSpeex::stop(){
+void AudioStreamPlaybackSpeex::stop(){
+
- _THREAD_SAFE_METHOD_
unload();
playing = false;
- _clear();
-}
-bool AudioStreamSpeex::is_playing() const{
- return _is_ready() && (playing || (get_total() - get_todo() -1 > 0));
}
+bool AudioStreamPlaybackSpeex::is_playing() const{
-void AudioStreamSpeex::set_paused(bool p_paused){
-
- playing = !p_paused;
- paused = p_paused;
+ return playing;
}
-bool AudioStreamSpeex::is_paused(bool p_paused) const{
- return paused;
-}
-void AudioStreamSpeex::set_loop(bool p_enable){
+void AudioStreamPlaybackSpeex::set_loop(bool p_enable){
loops = p_enable;
}
-bool AudioStreamSpeex::has_loop() const{
+bool AudioStreamPlaybackSpeex::has_loop() const{
return loops;
}
-float AudioStreamSpeex::get_length() const{
+float AudioStreamPlaybackSpeex::get_length() const{
return 0;
}
-String AudioStreamSpeex::get_stream_name() const{
+String AudioStreamPlaybackSpeex::get_stream_name() const{
return "";
}
-int AudioStreamSpeex::get_loop_count() const{
+int AudioStreamPlaybackSpeex::get_loop_count() const{
return 0;
}
-float AudioStreamSpeex::get_pos() const{
+float AudioStreamPlaybackSpeex::get_pos() const{
return 0;
}
-void AudioStreamSpeex::seek_pos(float p_time){
+void AudioStreamPlaybackSpeex::seek_pos(float p_time){
};
-bool AudioStreamSpeex::_can_mix() const {
- //return playing;
- return data.size() != 0;
-};
+AudioStreamPlaybackSpeex::AudioStreamPlaybackSpeex() {
+
+ active=false;
+ st = NULL;
+ stream_channels=1;
+ stream_srate=1;
+ stream_minbuff_size=1;
+ playing=false;
-AudioStream::UpdateMode AudioStreamSpeex::get_update_mode() const {
- return UPDATE_THREAD;
}
-AudioStreamSpeex::AudioStreamSpeex() {
+AudioStreamPlaybackSpeex::~AudioStreamPlaybackSpeex() {
- active=false;
- st = NULL;
+ unload();
}
-AudioStreamSpeex::~AudioStreamSpeex() {
- unload();
+
+
+
+////////////////////////////////////////
+
+
+
+void AudioStreamSpeex::set_file(const String& p_file) {
+
+ if (this->file == p_file)
+ return;
+
+ this->file=p_file;
+
+ if (p_file == "") {
+ data.resize(0);
+ return;
+ };
+
+ Error err;
+ FileAccess* file = FileAccess::open(p_file, FileAccess::READ,&err);
+ if (err != OK) {
+ data.resize(0);
+ };
+ ERR_FAIL_COND(err != OK);
+
+ this->file = p_file;
+ data.resize(file->get_len());
+ int read = file->get_buffer(&data[0], data.size());
+ memdelete(file);
+
}
RES ResourceFormatLoaderAudioStreamSpeex::load(const String &p_path, const String& p_original_path, Error *r_error) {
diff --git a/drivers/speex/audio_stream_speex.h b/drivers/speex/audio_stream_speex.h
index f9e0fce666..f0617b302f 100644
--- a/drivers/speex/audio_stream_speex.h
+++ b/drivers/speex/audio_stream_speex.h
@@ -1,7 +1,7 @@
#ifndef AUDIO_STREAM_SPEEX_H
#define AUDIO_STREAM_SPEEX_H
-#include "scene/resources/audio_stream_resampled.h"
+#include "scene/resources/audio_stream.h"
#include "speex/speex.h"
#include "os/file_access.h"
#include "io/resource_loader.h"
@@ -14,10 +14,10 @@
#include <ogg/ogg.h>
-class AudioStreamSpeex : public AudioStreamResampled {
+class AudioStreamPlaybackSpeex : public AudioStreamPlayback {
+
+ OBJ_TYPE(AudioStreamPlaybackSpeex, AudioStreamPlayback);
- OBJ_TYPE(AudioStreamSpeex, AudioStreamResampled);
- _THREAD_SAFE_CLASS_
void *st;
SpeexBits bits;
@@ -29,7 +29,6 @@ class AudioStreamSpeex : public AudioStreamResampled {
bool loops;
int page_size;
bool playing;
- bool paused;
bool packets_available;
void unload();
@@ -45,6 +44,9 @@ class AudioStreamSpeex : public AudioStreamResampled {
ogg_int64_t page_granule, last_granule;
int skip_samples, page_nb_packets;
+ int stream_channels;
+ int stream_srate;
+ int stream_minbuff_size;
void* process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers);
@@ -52,7 +54,7 @@ class AudioStreamSpeex : public AudioStreamResampled {
protected:
- virtual bool _can_mix() const;
+ //virtual bool _can_mix() const;
Dictionary _get_bundled() const;
void _set_bundled(const Dictionary& dict);
@@ -60,16 +62,12 @@ protected:
public:
- void set_file(const String& p_file);
- String get_file() const;
+ void set_data(const Vector<uint8_t>& p_data);
- virtual void play();
+ virtual void play(float p_from_pos=0);
virtual void stop();
virtual bool is_playing() const;
- virtual void set_paused(bool p_paused);
- virtual bool is_paused(bool p_paused) const;
-
virtual void set_loop(bool p_enable);
virtual bool has_loop() const;
@@ -82,13 +80,39 @@ public:
virtual float get_pos() const;
virtual void seek_pos(float p_time);
- virtual UpdateMode get_update_mode() const;
- virtual void update();
+ virtual int get_channels() const { return stream_channels; }
+ virtual int get_mix_rate() const { return stream_srate; }
+
+ virtual int get_minimum_buffer_size() const { return stream_minbuff_size; }
+ virtual int mix(int16_t* p_bufer,int p_frames);
+
+ virtual void set_loop_restart_time(float p_time) { } //no loop restart, ignore
+
+ AudioStreamPlaybackSpeex();
+ ~AudioStreamPlaybackSpeex();
+};
+
+
+
+class AudioStreamSpeex : public AudioStream {
+
+ OBJ_TYPE(AudioStreamSpeex,AudioStream);
+
+ Vector<uint8_t> data;
+ String file;
+public:
+
+ Ref<AudioStreamPlayback> instance_playback() {
+ Ref<AudioStreamPlaybackSpeex> pb = memnew( AudioStreamPlaybackSpeex );
+ pb->set_data(data);
+ return pb;
+ }
+
+ void set_file(const String& p_file);
- AudioStreamSpeex();
- ~AudioStreamSpeex();
};
+
class ResourceFormatLoaderAudioStreamSpeex : public ResourceFormatLoader {
public:
virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
diff --git a/drivers/speex/config.h b/drivers/speex/config.h
index d31382702c..8c48e3b99d 100644
--- a/drivers/speex/config.h
+++ b/drivers/speex/config.h
@@ -1,52 +1,52 @@
-/*
- Copyright (C) 2003 Commonwealth Scientific and Industrial Research
- Organisation (CSIRO) Australia
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- - Neither the name of CSIRO Australia nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef CONFIG_H
-#define CONFIG_H
-
-/* An inline macro is required for use of the inline keyword as not all C compilers support */
-/* inline. It is officially C99 and C++ only */
-
-
-/* Use only fixed point arithmetic */
-
-//#ifdef _MSC_VER
-//#define inline _inline
-//#endif
-
-#define FIXED_POINT 1
-
-#ifdef _MSC_VER
-#define inline __inline
-#endif
-
-#endif /* ! CONFIG_H */
+/*
+ Copyright (C) 2003 Commonwealth Scientific and Industrial Research
+ Organisation (CSIRO) Australia
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of CSIRO Australia nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+/* An inline macro is required for use of the inline keyword as not all C compilers support */
+/* inline. It is officially C99 and C++ only */
+
+
+/* Use only fixed point arithmetic */
+
+//#ifdef _MSC_VER
+//#define inline _inline
+//#endif
+
+#define FIXED_POINT 1
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
+#endif /* ! CONFIG_H */
diff --git a/drivers/speex/lsp.h b/drivers/speex/lsp.h
index 648652fb9e..b55bd42f2c 100644
--- a/drivers/speex/lsp.h
+++ b/drivers/speex/lsp.h
@@ -1,64 +1,64 @@
-/*---------------------------------------------------------------------------*\
-Original Copyright
- FILE........: AK2LSPD.H
- TYPE........: Turbo C header file
- COMPANY.....: Voicetronix
- AUTHOR......: James Whitehall
- DATE CREATED: 21/11/95
-
-Modified by Jean-Marc Valin
-
- This file contains functions for converting Linear Prediction
- Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the
- LSP coefficients are not in radians format but in the x domain of the
- unit circle.
-
-\*---------------------------------------------------------------------------*/
-/**
- @file lsp.h
- @brief Line Spectral Pair (LSP) functions.
-*/
-/* Speex License:
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- - Neither the name of the Xiph.org Foundation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef __AK2LSPD__
-#define __AK2LSPD__
-
-#include "arch.h"
-
-int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack);
-void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack);
-
-/*Added by JMV*/
-void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin);
-
-void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes);
-
-#endif /* __AK2LSPD__ */
+/*---------------------------------------------------------------------------*\
+Original Copyright
+ FILE........: AK2LSPD.H
+ TYPE........: Turbo C header file
+ COMPANY.....: Voicetronix
+ AUTHOR......: James Whitehall
+ DATE CREATED: 21/11/95
+
+Modified by Jean-Marc Valin
+
+ This file contains functions for converting Linear Prediction
+ Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the
+ LSP coefficients are not in radians format but in the x domain of the
+ unit circle.
+
+\*---------------------------------------------------------------------------*/
+/**
+ @file lsp.h
+ @brief Line Spectral Pair (LSP) functions.
+*/
+/* Speex License:
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __AK2LSPD__
+#define __AK2LSPD__
+
+#include "arch.h"
+
+int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack);
+void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack);
+
+/*Added by JMV*/
+void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin);
+
+void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes);
+
+#endif /* __AK2LSPD__ */
diff --git a/drivers/speex/speex_bind.cpp b/drivers/speex/speex_bind.cpp
index 6e9eb638a2..d15bb3da8c 100644
--- a/drivers/speex/speex_bind.cpp
+++ b/drivers/speex/speex_bind.cpp
@@ -1,64 +1,64 @@
-
-#include "memory.h"
-#include "speex_bind.h"
-#include
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void *speex_alloc (int size) {
-
- uint8_t * mem = (uint8_t*)memalloc(size);
- for(int i=0;i<size;i++)
- mem[i]=0;
- return mem;
-}
-
-void *speex_alloc_scratch (int size) {
-
- return memalloc(size);
-}
-
-void *speex_realloc (void *ptr, int size) {
-
- return memrealloc(ptr,size);
-}
-
-void speex_free (void *ptr) {
-
- memfree(ptr);
-}
-
-void speex_free_scratch (void *ptr) {
-
- memfree(ptr);
-}
-
-void _speex_fatal(const char *str, const char *file, int line) {
-
- _err_print_error("SPEEX ERROR",p_file,p_line,str);
-}
-
-void speex_warning(const char *str) {
-
- _err_print_error("SPEEX WARNING","",0,str);
-}
-
-void speex_warning_int(const char *str, int val) {
-
- _err_print_error("SPEEX WARNING INT","Value",val,str);
-}
-
-void speex_notify(const char *str) {
-
- print_line(str);
-}
-
-void _speex_putc(int ch, void *file) {
-
- // will not putc, no.
-}
-
-#ifdef __cplusplus
-}
-#endif
+
+#include "memory.h"
+#include "speex_bind.h"
+#include
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *speex_alloc (int size) {
+
+ uint8_t * mem = (uint8_t*)memalloc(size);
+ for(int i=0;i<size;i++)
+ mem[i]=0;
+ return mem;
+}
+
+void *speex_alloc_scratch (int size) {
+
+ return memalloc(size);
+}
+
+void *speex_realloc (void *ptr, int size) {
+
+ return memrealloc(ptr,size);
+}
+
+void speex_free (void *ptr) {
+
+ memfree(ptr);
+}
+
+void speex_free_scratch (void *ptr) {
+
+ memfree(ptr);
+}
+
+void _speex_fatal(const char *str, const char *file, int line) {
+
+ _err_print_error("SPEEX ERROR",p_file,p_line,str);
+}
+
+void speex_warning(const char *str) {
+
+ _err_print_error("SPEEX WARNING","",0,str);
+}
+
+void speex_warning_int(const char *str, int val) {
+
+ _err_print_error("SPEEX WARNING INT","Value",val,str);
+}
+
+void speex_notify(const char *str) {
+
+ print_line(str);
+}
+
+void _speex_putc(int ch, void *file) {
+
+ // will not putc, no.
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/drivers/speex/speex_bind.h b/drivers/speex/speex_bind.h
index e842960d3c..c928430a33 100644
--- a/drivers/speex/speex_bind.h
+++ b/drivers/speex/speex_bind.h
@@ -1,48 +1,48 @@
-#ifndef SPEEX_BIND_H
-#define SPEEX_BIND_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-#define OVERRIDE_SPEEX_ALLOC
-#define OVERRIDE_SPEEX_ALLOC_SCRATCH
-#define OVERRIDE_SPEEX_REALLOC
-#define OVERRIDE_SPEEX_FREE
-#define OVERRIDE_SPEEX_FREE_SCRATCH
-#define OVERRIDE_SPEEX_FATAL
-#define OVERRIDE_SPEEX_WARNING
-#define OVERRIDE_SPEEX_WARNING_INT
-#define OVERRIDE_SPEEX_NOTIFY
-#define OVERRIDE_SPEEX_PUTC
-
-void *speex_alloc (int size);
-void *speex_alloc_scratch (int size);
-void *speex_realloc (void *ptr, int size);
-void speex_free (void *ptr);
-void speex_free_scratch (void *ptr);
-void _speex_fatal(const char *str, const char *file, int line);
-void speex_warning(const char *str);
-void speex_warning_int(const char *str, int val);
-void speex_notify(const char *str);
-void _speex_putc(int ch, void *file);
-
-
-*/
-#define RELEASE
-#define SPEEX_PI 3.14159265358979323846
-
-#ifdef _MSC_VER
-#define SPEEX_INLINE __inline
-#else
-#define SPEEX_INLINE inline
-#endif
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // SPEEX_BIND_H
+#ifndef SPEEX_BIND_H
+#define SPEEX_BIND_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+#define OVERRIDE_SPEEX_ALLOC
+#define OVERRIDE_SPEEX_ALLOC_SCRATCH
+#define OVERRIDE_SPEEX_REALLOC
+#define OVERRIDE_SPEEX_FREE
+#define OVERRIDE_SPEEX_FREE_SCRATCH
+#define OVERRIDE_SPEEX_FATAL
+#define OVERRIDE_SPEEX_WARNING
+#define OVERRIDE_SPEEX_WARNING_INT
+#define OVERRIDE_SPEEX_NOTIFY
+#define OVERRIDE_SPEEX_PUTC
+
+void *speex_alloc (int size);
+void *speex_alloc_scratch (int size);
+void *speex_realloc (void *ptr, int size);
+void speex_free (void *ptr);
+void speex_free_scratch (void *ptr);
+void _speex_fatal(const char *str, const char *file, int line);
+void speex_warning(const char *str);
+void speex_warning_int(const char *str, int val);
+void speex_notify(const char *str);
+void _speex_putc(int ch, void *file);
+
+
+*/
+#define RELEASE
+#define SPEEX_PI 3.14159265358979323846
+
+#ifdef _MSC_VER
+#define SPEEX_INLINE __inline
+#else
+#define SPEEX_INLINE inline
+#endif
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // SPEEX_BIND_H
diff --git a/drivers/theora/video_stream_theora.cpp b/drivers/theora/video_stream_theora.cpp
index 214185cf88..bea49e34b7 100644
--- a/drivers/theora/video_stream_theora.cpp
+++ b/drivers/theora/video_stream_theora.cpp
@@ -1,16 +1,12 @@
#ifdef THEORA_ENABLED
-#if 0
+
#include "video_stream_theora.h"
#include "os/os.h"
#include "yuv2rgb.h"
+#include "globals.h"
-AudioStream::UpdateMode VideoStreamTheora::get_update_mode() const {
-
- return UPDATE_IDLE;
-};
-
-int VideoStreamTheora:: buffer_data() {
+int VideoStreamPlaybackTheora:: buffer_data() {
char *buffer=ogg_sync_buffer(&oy,4096);
int bytes=file->get_buffer((uint8_t*)buffer, 4096);
@@ -18,33 +14,13 @@ int VideoStreamTheora:: buffer_data() {
return(bytes);
}
-int VideoStreamTheora::queue_page(ogg_page *page){
+int VideoStreamPlaybackTheora::queue_page(ogg_page *page){
if(theora_p)ogg_stream_pagein(&to,page);
if(vorbis_p)ogg_stream_pagein(&vo,page);
return 0;
}
-Image VideoStreamTheora::peek_frame() const {
-
- if (frames_pending == 0)
- return Image();
- return Image(size.x, size.y, 0, format, frame_data);
-};
-
-Image VideoStreamTheora::pop_frame() {
-
- Image ret = peek_frame();
- frames_pending = 0;
-
- return ret;
-};
-
-int VideoStreamTheora::get_pending_frame_count() const {
-
- return frames_pending;
-};
-
-void VideoStreamTheora::video_write(void){
+void VideoStreamPlaybackTheora::video_write(void){
th_ycbcr_buffer yuv;
int y_offset, uv_offset;
th_decode_ycbcr_out(td,yuv);
@@ -78,25 +54,31 @@ void VideoStreamTheora::video_write(void){
int pitch = 4;
frame_data.resize(size.x * size.y * pitch);
- DVector<uint8_t>::Write w = frame_data.write();
- char* dst = (char*)w.ptr();
+ {
+ DVector<uint8_t>::Write w = frame_data.write();
+ char* dst = (char*)w.ptr();
- uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2);
+ uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2);
- if (px_fmt == TH_PF_444) {
+ if (px_fmt == TH_PF_444) {
- yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
+ yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
- } else if (px_fmt == TH_PF_422) {
+ } else if (px_fmt == TH_PF_422) {
- yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
+ yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
- } else if (px_fmt == TH_PF_420) {
+ } else if (px_fmt == TH_PF_420) {
- yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
- };
+ yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
+ };
- format = Image::FORMAT_RGBA;
+ format = Image::FORMAT_RGBA;
+ }
+
+ Image img(size.x,size.y,0,Image::FORMAT_RGBA,frame_data); //zero copy image creation
+
+ texture->set_data(img); //zero copy send to visual server
/*
@@ -194,7 +176,7 @@ void VideoStreamTheora::video_write(void){
frames_pending = 1;
}
-void VideoStreamTheora::clear() {
+void VideoStreamPlaybackTheora::clear() {
if (file_name == "")
return;
@@ -218,7 +200,7 @@ void VideoStreamTheora::clear() {
}
ogg_sync_clear(&oy);
- file_name = "";
+ //file_name = "";
theora_p = 0;
vorbis_p = 0;
@@ -229,7 +211,7 @@ void VideoStreamTheora::clear() {
playing = false;
};
-void VideoStreamTheora::set_file(const String& p_file) {
+void VideoStreamPlaybackTheora::set_file(const String& p_file) {
ogg_packet op;
th_setup_info *ts = NULL;
@@ -241,7 +223,7 @@ void VideoStreamTheora::set_file(const String& p_file) {
file = FileAccess::open(p_file, FileAccess::READ);
ERR_FAIL_COND(!file);
- audio_frames_wrote = 0;
+
ogg_sync_init(&oy);
@@ -256,6 +238,8 @@ void VideoStreamTheora::set_file(const String& p_file) {
/* Ogg file open; parse the headers */
/* Only interested in Vorbis/Theora streams */
int stateflag = 0;
+
+ int audio_track_skip=audio_track;
while(!stateflag){
int ret=buffer_data();
if(ret==0)break;
@@ -282,8 +266,14 @@ void VideoStreamTheora::set_file(const String& p_file) {
theora_p=1;
}else if(!vorbis_p && vorbis_synthesis_headerin(&vi,&vc,&op)>=0){
/* it is vorbis */
- copymem(&vo,&test,sizeof(test));
- vorbis_p=1;
+ if (audio_track_skip) {
+ vorbis_info_clear(&vi);
+ vorbis_comment_clear(&vc);
+ audio_track_skip--;
+ } else {
+ copymem(&vo,&test,sizeof(test));
+ vorbis_p=1;
+ }
}else{
/* whatever it is, we don't care about it */
ogg_stream_clear(&test);
@@ -386,6 +376,8 @@ void VideoStreamTheora::set_file(const String& p_file) {
size.x = w;
size.y = h;
+ texture->create(w,h,Image::FORMAT_RGBA,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE);
+
}else{
/* tear down the partial theora setup */
th_info_clear(&ti);
@@ -399,7 +391,7 @@ void VideoStreamTheora::set_file(const String& p_file) {
vorbis_block_init(&vd,&vb);
fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n",
vo.serialno,vi.channels,vi.rate);
- _setup(vi.channels, vi.rate);
+ //_setup(vi.channels, vi.rate);
}else{
/* tear down the partial vorbis setup */
vorbis_info_clear(&vi);
@@ -411,227 +403,299 @@ void VideoStreamTheora::set_file(const String& p_file) {
time=0;
};
-float VideoStreamTheora::get_time() const {
+float VideoStreamPlaybackTheora::get_time() const {
//print_line("total: "+itos(get_total())+" todo: "+itos(get_todo()));
//return MAX(0,time-((get_total())/(float)vi.rate));
- return time-((get_total())/(float)vi.rate);
+ return time-AudioServer::get_singleton()->get_output_delay()-delay_compensation;//-((get_total())/(float)vi.rate);
};
-void VideoStreamTheora::update() {
+Ref<Texture> VideoStreamPlaybackTheora::get_texture() {
+
+ return texture;
+}
+
+void VideoStreamPlaybackTheora::update(float p_delta) {
if (!playing) {
//printf("not playing\n");
return;
};
- double ctime =AudioServer::get_singleton()->get_mix_time();
+ //double ctime =AudioServer::get_singleton()->get_mix_time();
- if (last_update_time) {
- double delta = (ctime-last_update_time);
- time+=delta;
- //print_line("delta: "+rtos(delta));
- }
- last_update_time=ctime;
+ //print_line("play "+rtos(p_delta));
+ time+=p_delta;
+ if (videobuf_time>get_time())
+ return; //no new frames need to be produced
- int audio_todo = get_todo();
- ogg_packet op;
- int audio_pending = 0;
+ bool frame_done=false;
+ while (!frame_done) {
+ //a frame needs to be produced
- while (vorbis_p && audio_todo) {
- int ret;
- float **pcm;
-
- /* if there's pending, decoded audio, grab it */
- if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) {
-
- audio_pending = ret;
- int16_t* out = get_write_buffer();
- int count = 0;
- int to_read = MIN(ret, audio_todo);
- for (int i=0; i<to_read; i++) {
-
- for(int j=0;j<vi.channels;j++){
- int val=Math::fast_ftoi(pcm[j][i]*32767.f);
- if(val>32767)val=32767;
- if(val<-32768)val=-32768;
- out[count++] = val;
- };
- };
- int tr = vorbis_synthesis_read(&vd, to_read);
- audio_todo -= to_read;
- audio_frames_wrote += to_read;
- write(to_read);
- audio_pending -= to_read;
- if (audio_todo==0)
- buffering=false;
+ ogg_packet op;
+ bool audio_pending = false;
- } else {
+ while (vorbis_p) {
+ int ret;
+ float **pcm;
+
+ bool buffer_full=false;
+
+ /* if there's pending, decoded audio, grab it */
+ if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) {
+
+
+
+ const int AUXBUF_LEN=4096;
+ int to_read = ret;
+ int16_t aux_buffer[AUXBUF_LEN];
+
+ while(to_read) {
+
+ int m = MIN(AUXBUF_LEN/vi.channels,to_read);
+
+ int count = 0;
+
+ for(int j=0;j<m;j++){
+ for(int i=0;i<vi.channels;i++){
+
+ int val=Math::fast_ftoi(pcm[i][j]*32767.f);
+ if(val>32767)val=32767;
+ if(val<-32768)val=-32768;
+ aux_buffer[count++] = val;
+ }
+ }
+
+ if (mix_callback) {
+ int mixed = mix_callback(mix_udata,aux_buffer,m);
+ to_read-=mixed;
+ if (mixed!=m) { //could mix no more
+ buffer_full=true;
+ break;
+ }
+ } else {
+ to_read-=m; //just pretend we sent the audio
+ }
+
- /* no pending audio; is there a pending packet to decode? */
- if (ogg_stream_packetout(&vo,&op)>0){
- if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */
- vorbis_synthesis_blockin(&vd,&vb);
}
- } else { /* we need more data; break out to suck in another page */
- //printf("need moar data\n");
+
+
+ int tr = vorbis_synthesis_read(&vd, ret-to_read);
+
+ audio_pending=true;
+
+
+ } else {
+
+ /* no pending audio; is there a pending packet to decode? */
+ if (ogg_stream_packetout(&vo,&op)>0){
+ if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */
+ vorbis_synthesis_blockin(&vd,&vb);
+ }
+ } else { /* we need more data; break out to suck in another page */
+ //printf("need moar data\n");
+ break;
+ };
+ }
+
+ if (buffer_full)
break;
- };
}
- }
- while(theora_p && !videobuf_ready){
- /* theora is one in, one out... */
- if(ogg_stream_packetout(&to,&op)>0){
+ while(theora_p && !frame_done){
+ /* theora is one in, one out... */
+ if(ogg_stream_packetout(&to,&op)>0){
- if(pp_inc){
- pp_level+=pp_inc;
- th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,
- sizeof(pp_level));
- pp_inc=0;
- }
- /*HACK: This should be set after a seek or a gap, but we might not have
- a granulepos for the first packet (we only have them for the last
- packet on a page), so we just set it as often as we get it.
- To do this right, we should back-track from the last packet on the
- page and compute the correct granulepos for the first packet after
- a seek or a gap.*/
- if(op.granulepos>=0){
- th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos,
- sizeof(op.granulepos));
- }
- ogg_int64_t videobuf_granulepos;
- if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){
- videobuf_time=th_granule_time(td,videobuf_granulepos);
- //printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
-
- /* is it already too old to be useful? This is only actually
- useful cosmetically after a SIGSTOP. Note that we have to
- decode the frame even if we don't show it (for now) due to
- keyframing. Soon enough libtheora will be able to deal
- with non-keyframe seeks. */
-
- if(videobuf_time>=get_time())
- videobuf_ready=1;
- else{
- /*If we are too slow, reduce the pp level.*/
- pp_inc=pp_level>0?-1:0;
+ if(pp_inc){
+ pp_level+=pp_inc;
+ th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,
+ sizeof(pp_level));
+ pp_inc=0;
+ }
+ /*HACK: This should be set after a seek or a gap, but we might not have
+ a granulepos for the first packet (we only have them for the last
+ packet on a page), so we just set it as often as we get it.
+ To do this right, we should back-track from the last packet on the
+ page and compute the correct granulepos for the first packet after
+ a seek or a gap.*/
+ if(op.granulepos>=0){
+ th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos,
+ sizeof(op.granulepos));
+ }
+ ogg_int64_t videobuf_granulepos;
+ if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){
+ videobuf_time=th_granule_time(td,videobuf_granulepos);
+
+ //printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
+
+ /* is it already too old to be useful? This is only actually
+ useful cosmetically after a SIGSTOP. Note that we have to
+ decode the frame even if we don't show it (for now) due to
+ keyframing. Soon enough libtheora will be able to deal
+ with non-keyframe seeks. */
+
+ if(videobuf_time>=get_time())
+ frame_done=true;
+ else{
+ /*If we are too slow, reduce the pp level.*/
+ pp_inc=pp_level>0?-1:0;
+ }
}
- }
-
- } else
- break;
- }
- if (/*!videobuf_ready && */ audio_pending == 0 && file->eof_reached()) {
- printf("video done, stopping\n");
- stop();
- return;
- };
+ } else
+ break;
+ }
- if (!videobuf_ready || audio_todo > 0){
- /* no data yet for somebody. Grab another page */
+ if (file && /*!videobuf_ready && */ file->eof_reached()) {
+ printf("video done, stopping\n");
+ stop();
+ return;
+ };
+ #if 0
+ if (!videobuf_ready || audio_todo > 0){
+ /* no data yet for somebody. Grab another page */
- buffer_data();
- while(ogg_sync_pageout(&oy,&og)>0){
- queue_page(&og);
+ buffer_data();
+ while(ogg_sync_pageout(&oy,&og)>0){
+ queue_page(&og);
+ }
}
- }
+ #else
+ if (!frame_done){
+ //what's the point of waiting for audio to grab a page?
- /* If playback has begun, top audio buffer off immediately. */
- //if(stateflag) audio_write_nonblocking();
+ buffer_data();
+ while(ogg_sync_pageout(&oy,&og)>0){
+ queue_page(&og);
+ }
+ }
+ #endif
+ /* If playback has begun, top audio buffer off immediately. */
+ //if(stateflag) audio_write_nonblocking();
- /* are we at or past time for this video frame? */
- if(videobuf_ready && videobuf_time<=get_time()){
+ /* are we at or past time for this video frame? */
+ if(videobuf_ready && videobuf_time<=get_time()){
- video_write();
- videobuf_ready=0;
- } else {
- //printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
- }
+ //video_write();
+ //videobuf_ready=0;
+ } else {
+ //printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
+ }
- float tdiff=videobuf_time-get_time();
- /*If we have lots of extra time, increase the post-processing level.*/
- if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){
- pp_inc=pp_level<pp_level_max?1:0;
- }
- else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){
- pp_inc=pp_level>0?-1:0;
+ float tdiff=videobuf_time-get_time();
+ /*If we have lots of extra time, increase the post-processing level.*/
+ if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){
+ pp_inc=pp_level<pp_level_max?1:0;
+ }
+ else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){
+ pp_inc=pp_level>0?-1:0;
+ }
}
-};
-bool VideoStreamTheora::_can_mix() const {
+ video_write();
- return !buffering;
};
-void VideoStreamTheora::play() {
+
+void VideoStreamPlaybackTheora::play() {
if (!playing)
- last_update_time=0;
+ time=0;
playing = true;
+ delay_compensation=Globals::get_singleton()->get("audio/video_delay_compensation_ms");
+ delay_compensation/=1000.0;
+
};
-void VideoStreamTheora::stop() {
+void VideoStreamPlaybackTheora::stop() {
+ if (playing) {
+ clear();
+ set_file(file_name); //reset
+ }
playing = false;
- last_update_time=0;
+ time=0;
};
-bool VideoStreamTheora::is_playing() const {
+bool VideoStreamPlaybackTheora::is_playing() const {
return playing;
};
-void VideoStreamTheora::set_paused(bool p_paused) {
+void VideoStreamPlaybackTheora::set_paused(bool p_paused) {
playing = !p_paused;
};
-bool VideoStreamTheora::is_paused(bool p_paused) const {
+bool VideoStreamPlaybackTheora::is_paused(bool p_paused) const {
return playing;
};
-void VideoStreamTheora::set_loop(bool p_enable) {
+void VideoStreamPlaybackTheora::set_loop(bool p_enable) {
};
-bool VideoStreamTheora::has_loop() const {
+bool VideoStreamPlaybackTheora::has_loop() const {
return false;
};
-float VideoStreamTheora::get_length() const {
+float VideoStreamPlaybackTheora::get_length() const {
return 0;
};
-String VideoStreamTheora::get_stream_name() const {
+String VideoStreamPlaybackTheora::get_stream_name() const {
return "";
};
-int VideoStreamTheora::get_loop_count() const {
+int VideoStreamPlaybackTheora::get_loop_count() const {
return 0;
};
-float VideoStreamTheora::get_pos() const {
+float VideoStreamPlaybackTheora::get_pos() const {
return get_time();
};
-void VideoStreamTheora::seek_pos(float p_time) {
+void VideoStreamPlaybackTheora::seek_pos(float p_time) {
// no
};
-VideoStreamTheora::VideoStreamTheora() {
+void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback,void *p_userdata) {
+
+ mix_callback=p_callback;
+ mix_udata=p_userdata;
+}
+
+int VideoStreamPlaybackTheora::get_channels() const{
+
+ return vi.channels;
+}
+
+void VideoStreamPlaybackTheora::set_audio_track(int p_idx) {
+
+ audio_track=p_idx;
+}
+
+int VideoStreamPlaybackTheora::get_mix_rate() const{
+
+ return vi.rate;
+}
+
+
+
+VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() {
file = NULL;
theora_p = 0;
@@ -640,11 +704,16 @@ VideoStreamTheora::VideoStreamTheora() {
playing = false;
frames_pending = 0;
videobuf_time = 0;
- last_update_time =0;
+
buffering=false;
+ texture = Ref<ImageTexture>( memnew(ImageTexture ));
+ mix_callback=NULL;
+ mix_udata=NULL;
+ audio_track=0;
+ delay_compensation=0;
};
-VideoStreamTheora::~VideoStreamTheora() {
+VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() {
clear();
@@ -653,10 +722,16 @@ VideoStreamTheora::~VideoStreamTheora() {
};
-RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path) {
+RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path, Error *r_error) {
+ if (r_error)
+ *r_error=ERR_FILE_CANT_OPEN;
VideoStreamTheora *stream = memnew(VideoStreamTheora);
stream->set_file(p_path);
+
+ if (r_error)
+ *r_error=OK;
+
return Ref<VideoStreamTheora>(stream);
}
@@ -666,16 +741,16 @@ void ResourceFormatLoaderVideoStreamTheora::get_recognized_extensions(List<Strin
p_extensions->push_back("ogv");
}
bool ResourceFormatLoaderVideoStreamTheora::handles_type(const String& p_type) const {
- return (p_type=="AudioStream" || p_type=="VideoStreamTheora");
+ return (p_type=="VideoStream" || p_type=="VideoStreamTheora");
}
String ResourceFormatLoaderVideoStreamTheora::get_resource_type(const String &p_path) const {
String exl=p_path.extension().to_lower();
if (exl=="ogm" || exl=="ogv")
- return "AudioStreamTheora";
+ return "VideoStreamTheora";
return "";
}
#endif
-#endif
+
diff --git a/drivers/theora/video_stream_theora.h b/drivers/theora/video_stream_theora.h
index 12aac731fc..95c7fe88f6 100644
--- a/drivers/theora/video_stream_theora.h
+++ b/drivers/theora/video_stream_theora.h
@@ -10,9 +10,9 @@
#include "io/resource_loader.h"
#include "scene/resources/video_stream.h"
-class VideoStreamTheora : public VideoStream {
+class VideoStreamPlaybackTheora : public VideoStreamPlayback {
- OBJ_TYPE(VideoStreamTheora, VideoStream);
+ OBJ_TYPE(VideoStreamPlaybackTheora, VideoStreamPlayback);
enum {
MAX_FRAMES = 4,
@@ -58,16 +58,19 @@ class VideoStreamTheora : public VideoStream {
double last_update_time;
double time;
+ double delay_compensation;
-protected:
+ Ref<ImageTexture> texture;
- virtual UpdateMode get_update_mode() const;
- virtual void update();
+ AudioMixCallback mix_callback;
+ void* mix_udata;
- void clear();
+ int audio_track;
- virtual bool _can_mix() const;
+protected:
+ void clear();
+
public:
virtual void play();
@@ -92,12 +95,43 @@ public:
void set_file(const String& p_file);
- int get_pending_frame_count() const;
- Image pop_frame();
- Image peek_frame() const;
+ virtual Ref<Texture> get_texture();
+ virtual void update(float p_delta);
+
+ virtual void set_mix_callback(AudioMixCallback p_callback,void *p_userdata);
+ virtual int get_channels() const;
+ virtual int get_mix_rate() const;
+
+ virtual void set_audio_track(int p_idx);
+
+ VideoStreamPlaybackTheora();
+ ~VideoStreamPlaybackTheora();
+};
+
+
+
+class VideoStreamTheora : public VideoStream {
+
+ OBJ_TYPE(VideoStreamTheora,VideoStream);
+
+ String file;
+ int audio_track;
+
+
+public:
+
+ Ref<VideoStreamPlayback> instance_playback() {
+ Ref<VideoStreamPlaybackTheora> pb = memnew( VideoStreamPlaybackTheora );
+ pb->set_audio_track(audio_track);
+ pb->set_file(file);
+ return pb;
+ }
+
+ void set_file(const String& p_file) { file=p_file; }
+ void set_audio_track(int p_track) { audio_track=p_track; }
+
+ VideoStreamTheora() { audio_track=0; }
- VideoStreamTheora();
- ~VideoStreamTheora();
};
class ResourceFormatLoaderVideoStreamTheora : public ResourceFormatLoader {
diff --git a/drivers/theoraplayer/SCsub b/drivers/theoraplayer/SCsub
deleted file mode 100644
index 09fb13d8e9..0000000000
--- a/drivers/theoraplayer/SCsub
+++ /dev/null
@@ -1,106 +0,0 @@
-Import("env")
-
-import string
-
-sources = string.split("""
-src/TheoraVideoClip.cpp
-src/FFmpeg/TheoraVideoClip_FFmpeg.cpp
-src/TheoraAsync.cpp
-src/TheoraAudioInterface.cpp
-src/TheoraException.cpp
-src/TheoraWorkerThread.cpp
-src/TheoraVideoManager.cpp
-src/TheoraTimer.cpp
-src/TheoraUtil.cpp
-src/TheoraDataSource.cpp
-src/TheoraAudioPacketQueue.cpp
-src/TheoraFrameQueue.cpp
-src/Theora/TheoraVideoClip_Theora.cpp
-src/YUV/yuv_util.c
-src/YUV/libyuv/src/row_any.cc
-src/YUV/libyuv/src/compare_common.cc
-src/YUV/libyuv/src/scale_neon.cc
-src/YUV/libyuv/src/planar_functions.cc
-src/YUV/libyuv/src/compare.cc
-src/YUV/libyuv/src/scale_mips.cc
-src/YUV/libyuv/src/scale_posix.cc
-src/YUV/libyuv/src/row_posix.cc
-src/YUV/libyuv/src/row_win.cc
-src/YUV/libyuv/src/compare_neon.cc
-src/YUV/libyuv/src/convert_from_argb.cc
-src/YUV/libyuv/src/mjpeg_validate.cc
-src/YUV/libyuv/src/convert_from.cc
-src/YUV/libyuv/src/rotate_neon.cc
-src/YUV/libyuv/src/row_neon.cc
-src/YUV/libyuv/src/rotate_mips.cc
-src/YUV/libyuv/src/compare_posix.cc
-src/YUV/libyuv/src/row_mips.cc
-src/YUV/libyuv/src/scale.cc
-src/YUV/libyuv/src/scale_argb.cc
-src/YUV/libyuv/src/mjpeg_decoder.cc
-src/YUV/libyuv/src/scale_win.cc
-src/YUV/libyuv/src/scale_common.cc
-src/YUV/libyuv/src/scale_argb_neon.cc
-src/YUV/libyuv/src/row_common.cc
-src/YUV/libyuv/src/convert.cc
-src/YUV/libyuv/src/format_conversion.cc
-src/YUV/libyuv/src/rotate_argb.cc
-src/YUV/libyuv/src/rotate.cc
-src/YUV/libyuv/src/convert_argb.cc
-src/YUV/libyuv/src/cpu_id.cc
-src/YUV/libyuv/src/video_common.cc
-src/YUV/libyuv/src/convert_to_argb.cc
-src/YUV/libyuv/src/compare_win.cc
-src/YUV/libyuv/src/convert_to_i420.cc
-src/YUV/libyuv/src/convert_jpeg.cc
-src/YUV/libyuv/yuv_libyuv.c
-src/YUV/android/cpu-features.c
-src/YUV/C/yuv420_grey_c.c
-src/YUV/C/yuv420_yuv_c.c
-src/YUV/C/yuv420_rgb_c.c
-src/TheoraVideoFrame.cpp
-""")
-
-env_theora = env.Clone()
-
-if env["platform"] == "iphone":
- sources.append("src/AVFoundation/TheoraVideoClip_AVFoundation.mm")
- env.Append(LINKFLAGS=['-framework', 'CoreVideo', '-framework', 'CoreMedia', '-framework', 'AVFoundation'])
- if env["target"] == "release":
- env_theora.Append(CPPFLAGS=["-D_IOS", "-D__ARM_NEON__", "-fstrict-aliasing", "-fmessage-length=210", "-fdiagnostics-show-note-include-stack", "-fmacro-backtrace-limit=0", "-fcolor-diagnostics", "-Wno-trigraphs", "-fpascal-strings", "-fvisibility=hidden", "-fvisibility-inlines-hidden"])
-
-env_theora.Append(CPPFLAGS=["-D_LIB", "-D__THEORA"]) # removed -D_YUV_C
-env_theora.Append(CPPFLAGS=["-D_YUV_LIBYUV"])
-#env_theora.Append(CPPFLAGS=["-D_YUV_C"])
-
-if env["platform"] == "iphone":
- env_theora.Append(CPPFLAGS=["-D__AVFOUNDATION"])
-else:
- pass
- #env_theora.Append(CPPFLAGS=["-D__FFMPEG"])
-
-if env["platform"] == "android":
- env_theora.Append(CPPFLAGS=["-D_ANDROID"])
-
-if env["platform"] == "winrt":
- env_theora.Append(CPPFLAGS=["-D_WINRT"])
-
-env_theora.Append(CPPPATH=["#drivers/theoraplayer/include/theoraplayer", "#drivers/theoraplayer/src/YUV", "#drivers/theoraplayer/src/YUV/libyuv/include", "#drivers/theoraplayer/src/Theora", "#drivers/theoraplayer/src/AVFoundation"])
-
-objs = []
-
-env_theora.add_source_files(objs, ["video_stream_theoraplayer.cpp"])
-
-if env['use_theoraplayer_binary'] == "yes":
- if env["platform"] == "iphone":
- env.Append(LIBPATH=['#drivers/theoraplayer/lib/ios'])
- env.Append(LIBS=['theoraplayer', 'ogg', 'theora', 'tremor'])
- if env["platform"] == "windows":
- env.Append(LIBPATH=['#drivers/theoraplayer/lib/windows'])
- env.Append(LINKFLAGS=['libtheoraplayer_static.lib', 'libogg.lib', 'libtheora.lib', 'libvorbis.lib'])
-else:
- env_theora.add_source_files(objs, sources)
-
-env.drivers_sources += objs
-
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h b/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h
deleted file mode 100644
index 7f1b49b9af..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraAsync_h
-#define _TheoraAsync_h
-
-#ifndef _WIN32
-#include <pthread.h>
-#endif
-
-/// @note Based on hltypes::Thread
-class TheoraMutex
-{
-public:
- TheoraMutex();
- ~TheoraMutex();
- void lock();
- void unlock();
-
-protected:
- void* mHandle;
-
-};
-
-/// @note Based on hltypes::Thread
-class TheoraThread
-{
- TheoraMutex mRunningMutex;
-public:
- TheoraThread();
- virtual ~TheoraThread();
- void start();
- void stop();
- void resume();
- void pause();
- bool isRunning();
- virtual void execute() = 0;
- void join();
-
-protected:
- void* mId;
- volatile bool mRunning;
-
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h b/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h
deleted file mode 100644
index aa03293806..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraAudioInterface_h
-#define _TheoraAudioInterface_h
-
-#include "TheoraExport.h"
-
-class TheoraVideoClip;
-
-
-/**
- This is the class that serves as an interface between the library's audio
- output and the audio playback library of your choice.
- The class gets mono or stereo PCM data in in floating point data
- */
-class TheoraPlayerExport TheoraAudioInterface
-{
-public:
- //! PCM frequency, usualy 44100 Hz
- int mFreq;
- //! Mono or stereo
- int mNumChannels;
- //! Pointer to the parent TheoraVideoClip object
- TheoraVideoClip* mClip;
-
- TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq);
- virtual ~TheoraAudioInterface();
- //! A function that the TheoraVideoClip object calls once more audio packets are decoded
- /*!
- \param data contains one or two channels of float PCM data in the range [-1,1]
- \param nSamples contains the number of samples that the data parameter contains in each channel
- */
- virtual void insertData(float* data, int nSamples)=0;
-};
-
-class TheoraPlayerExport TheoraAudioInterfaceFactory
-{
-public:
- //! VideoManager calls this when creating a new TheoraVideoClip object
- virtual TheoraAudioInterface* createInstance(TheoraVideoClip* owner, int nChannels, int freq) = 0;
-};
-
-
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h b/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h
deleted file mode 100644
index e0d17516e6..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraAudioPacketQueue_h
-#define _TheoraAudioPacketQueue_h
-
-#include "TheoraExport.h"
-
-class TheoraAudioInterface;
-/**
- This is an internal structure which TheoraVideoClip_Theora uses to store audio packets
- */
-struct TheoraAudioPacket
-{
- float* pcm;
- int numSamples; //! size in number of float samples (stereo has twice the number of samples)
- TheoraAudioPacket* next; // pointer to the next audio packet, to implement a linked list
-};
-
-/**
- This is a Mutex object, used in thread syncronization.
- */
-class TheoraPlayerExport TheoraAudioPacketQueue
-{
-protected:
- unsigned int mAudioFrequency, mNumAudioChannels;
- TheoraAudioPacket* mTheoraAudioPacketQueue;
- void _addAudioPacket(float* data, int numSamples);
-public:
- TheoraAudioPacketQueue();
- ~TheoraAudioPacketQueue();
-
- float getAudioPacketQueueLength();
- void addAudioPacket(float** buffer, int numSamples, float gain);
- void addAudioPacket(float* buffer, int numSamples, float gain);
- TheoraAudioPacket* popAudioPacket();
- void destroyAudioPacket(TheoraAudioPacket* p);
- void destroyAllAudioPackets();
-
- void flushAudioPackets(TheoraAudioInterface* audioInterface);
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h b/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h
deleted file mode 100644
index b7427e97d3..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraDataSource_h
-#define _TheoraDataSource_h
-
-#include <stdio.h>
-#include <string>
-#include "TheoraExport.h"
-
-/**
- This is a simple class that provides abstracted data feeding. You can use the
- TheoraFileDataSource for regular file playback or you can implement your own
- internet streaming solution, or a class that uses encrypted datafiles etc.
- The sky is the limit
-*/
-class TheoraPlayerExport TheoraDataSource
-{
-public:
-
- virtual ~TheoraDataSource();
- /**
- Reads nBytes bytes from data source and returns number of read bytes.
- if function returns less bytes then nBytes, the system assumes EOF is reached.
- */
- virtual int read(void* output,int nBytes)=0;
- //! returns a string representation of the DataSource, eg 'File: source.ogg'
- virtual std::string repr()=0;
- //! position the source pointer to byte_index from the start of the source
- virtual void seek(unsigned long byte_index)=0;
- //! return the size of the stream in bytes
- virtual unsigned long size()=0;
- //! return the current position of the source pointer
- virtual unsigned long tell()=0;
-};
-
-
-/**
- provides standard file IO
-*/
-class TheoraPlayerExport TheoraFileDataSource : public TheoraDataSource
-{
- FILE* mFilePtr;
- std::string mFilename;
- unsigned long mSize;
-
- void openFile();
-public:
- TheoraFileDataSource(std::string filename);
- ~TheoraFileDataSource();
-
- int read(void* output,int nBytes);
- void seek(unsigned long byte_index);
- std::string repr() { return mFilename; }
- unsigned long size();
- unsigned long tell();
-
- std::string getFilename() { return mFilename; }
-};
-
-/**
- Pre-loads the entire file and streams from memory.
- Very useful if you're continuously displaying a video and want to avoid disk reads.
- Not very practical for large files.
-*/
-class TheoraPlayerExport TheoraMemoryFileDataSource : public TheoraDataSource
-{
- std::string mFilename;
- unsigned long mSize, mReadPointer;
- unsigned char* mData;
-public:
- TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename = "memory");
- TheoraMemoryFileDataSource(std::string filename);
- ~TheoraMemoryFileDataSource();
-
- int read(void* output,int nBytes);
- void seek(unsigned long byte_index);
- std::string repr() { return "MEM:"+mFilename; }
- unsigned long size();
- unsigned long tell();
- std::string getFilename() { return mFilename; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraException.h b/drivers/theoraplayer/include/theoraplayer/TheoraException.h
deleted file mode 100644
index f79368fa1e..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraException.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef EXCEPTION_H
-#define EXCEPTION_H
-
-#include <string>
-#include "TheoraExport.h"
-
-class TheoraPlayerExport _TheoraGenericException
-{
-public:
- std::string mErrText,mFile,mType;
- int mLineNumber;
-
- _TheoraGenericException(const std::string& errorText, std::string type = "",std::string file = "", int line = 0);
- virtual ~_TheoraGenericException() {}
-
- virtual std::string repr();
-
- void writeOutput();
-
- virtual const std::string& getErrorText() { return mErrText; }
-
- const std::string getType(){ return mType; }
-};
-
-#define TheoraGenericException(msg) _TheoraGenericException(msg, "TheoraGenericException", __FILE__, __LINE__)
-
-
-#define exception_cls(name) class name : public _TheoraGenericException \
-{ \
-public: \
- name(const std::string& errorText,std::string type = "",std::string file = "",int line = 0) : \
- _TheoraGenericException(errorText, type, file, line){} \
-}
-
-exception_cls(_KeyException);
-
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraExport.h b/drivers/theoraplayer/include/theoraplayer/TheoraExport.h
deleted file mode 100644
index cf16d1004c..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraExport.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _theoraVideoExport_h
-#define _theoraVideoExport_h
-
- #ifdef _LIB
- #define TheoraPlayerExport
- #define TheoraPlayerFnExport
- #else
- #ifdef _WIN32
- #ifdef THEORAVIDEO_EXPORTS
- #define TheoraPlayerExport __declspec(dllexport)
- #define TheoraPlayerFnExport __declspec(dllexport)
- #else
- #define TheoraPlayerExport __declspec(dllimport)
- #define TheoraPlayerFnExport __declspec(dllimport)
- #endif
- #else
- #define TheoraPlayerExport __attribute__ ((visibility("default")))
- #define TheoraPlayerFnExport __attribute__ ((visibility("default")))
- #endif
- #endif
- #ifndef DEPRECATED_ATTRIBUTE
- #ifdef _MSC_VER
- #define DEPRECATED_ATTRIBUTE __declspec(deprecated("function is deprecated"))
- #else
- #define DEPRECATED_ATTRIBUTE __attribute__((deprecated))
- #endif
- #endif
-
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h b/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h
deleted file mode 100644
index fd985bb65a..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraFrameQueue_h
-#define _TheoraFrameQueue_h
-
-#include "TheoraAsync.h"
-#include <list>
-#include "TheoraExport.h"
-
-class TheoraVideoFrame;
-class TheoraVideoClip;
-
-/**
- This class handles the frame queue. contains frames and handles their alloctation/deallocation
- it is designed to be thread-safe
-*/
-class TheoraPlayerExport TheoraFrameQueue
-{
-protected:
- std::list<TheoraVideoFrame*> mQueue;
- TheoraVideoClip* mParent;
- TheoraMutex mMutex;
-
- //! implementation function that returns a TheoraVideoFrame instance
- TheoraVideoFrame* createFrameInstance(TheoraVideoClip* clip);
-public:
- TheoraFrameQueue(TheoraVideoClip* parent);
- ~TheoraFrameQueue();
-
- /**
- \brief Returns the first available frame in the queue or NULL if no frames are available.
-
- This function DOES NOT remove the frame from the queue, you have to do it manually
- when you want to mark the frame as used by calling the pop() function.
- */
- TheoraVideoFrame* getFirstAvailableFrame();
- //! non-mutex version
- TheoraVideoFrame* _getFirstAvailableFrame();
-
- //! return the number of used (not ready) frames
- int getUsedCount();
-
- //! return the number of ready frames
- int getReadyCount();
- //! non-mutex version
- int _getReadyCount();
-
- /**
- \brief remove the first N available frame from the queue.
-
- Use this every time you display a frame so you can get the next one when the time comes.
- This function marks the frame on the front of the queue as unused and it's memory then
- get's used again in the decoding process.
- If you don't call this, the frame queue will fill up with precached frames up to the
- specified amount in the TheoraVideoManager class and you won't be able to advance the video.
- */
- void pop(int n = 1);
-
- //! This is an internal _pop function. use externally only in combination with lock() / unlock() calls
- void _pop(int n);
-
- //! frees all decoded frames for reuse (does not destroy memory, just marks them as free)
- void clear();
- //! Called by WorkerThreads when they need to unload frame data, do not call directly!
- TheoraVideoFrame* requestEmptyFrame();
-
- /**
- \brief set's the size of the frame queue.
-
- Beware, currently stored ready frames will be lost upon this call
- */
- void setSize(int n);
- //! return the size of the queue
- int getSize();
-
- //! return whether all frames in the queue are ready for display
- bool isFull();
-
- //! lock the queue's mutex manually
- void lock();
- //! unlock the queue's mutex manually
- void unlock();
-
- //! returns the internal frame queue. Warning: Always lock / unlock queue's mutex before accessing frames directly!
- std::list<TheoraVideoFrame*>& _getFrameQueue();
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h b/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h
deleted file mode 100644
index 73d853cd03..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraPixelTransform_h
-#define _TheoraPixelTransform_h
-
-struct TheoraPixelTransform
-{
- unsigned char *raw, *y, *u, *v, *out;
- unsigned int w, h, rawStride, yStride, uStride, vStride;
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h b/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h
deleted file mode 100644
index 8c5f2c735c..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraPlayer_h
-#define _TheoraPlayer_h
-
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip.h"
-#include "TheoraVideoFrame.h"
-
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h b/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h
deleted file mode 100644
index 14fdbf47fc..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraTimer_h
-#define _TheoraTimer_h
-
-#include "TheoraExport.h"
-
-/**
- This is a Timer object, it is used to control the playback of a TheoraVideoClip.
-
- You can inherit this class and make a timer that eg. plays twice as fast,
- or playbacks an audio track and uses it's time offset for syncronizing Video etc.
- */
-class TheoraPlayerExport TheoraTimer
-{
-protected:
- //! Current time in seconds
- float mTime,mSpeed;
- //! Is the timer paused or not
- bool mPaused;
-public:
- TheoraTimer();
- virtual ~TheoraTimer();
-
- virtual float getTime();
- /**
- \brief advance the time.
-
- If you're using another synronization system, eg. an audio track,
- then you can ignore this call or use it to perform other updates.
-
- NOTE: this is called by TheoraVideoManager from the main thread
- */
- virtual void update(float timeDelta);
-
- virtual void pause();
- virtual void play();
- virtual bool isPaused();
- virtual void stop();
- /**
- \brief set's playback speed
-
- 1.0 is the default. The speed factor multiplies time advance, thus
- setting the value higher will increase playback speed etc.
-
- NOTE: depending on Timer implementation, it may not support setting the speed
-
- */
- virtual void setSpeed(float speed);
- //! return the update speed 1.0 is the default
- virtual float getSpeed();
-
- /**
- \brief change the current time.
-
- if you're using another syncronization mechanism, make sure to adjust
- the time offset there
- */
- virtual void seek(float time);
-};
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h b/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h
deleted file mode 100644
index f168971ac7..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraUtil_h
-#define _TheoraUtil_h
-
-#include <string>
-#include <vector>
-
-#ifndef THEORAUTIL_NOMACROS
-
-#define foreach(type,lst) for (std::vector<type>::iterator it=lst.begin();it != lst.end(); ++it)
-#define foreach_l(type,lst) for (std::list<type>::iterator it=lst.begin();it != lst.end(); ++it)
-#define foreach_r(type,lst) for (std::vector<type>::reverse_iterator it=lst.rbegin();it != lst.rend(); ++it)
-#define foreach_in_map(type,lst) for (std::map<std::string,type>::iterator it=lst.begin();it != lst.end(); ++it)
-
-#endif
-
-#define th_writelog(x) TheoraVideoManager::getSingleton().logMessage(x)
-
-
-std::string str(int i);
-std::string strf(float i);
-void _psleep(int milliseconds);
-int _nextPow2(int x);
-
-#endif \ No newline at end of file
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h
deleted file mode 100644
index fe71cf8566..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h
+++ /dev/null
@@ -1,282 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraVideoClip_h
-#define _TheoraVideoClip_h
-
-#include <string>
-#include "TheoraExport.h"
-
-// forward class declarations
-class TheoraMutex;
-class TheoraFrameQueue;
-class TheoraTimer;
-class TheoraAudioInterface;
-class TheoraWorkerThread;
-class TheoraDataSource;
-class TheoraVideoFrame;
-
-/**
- format of the TheoraVideoFrame pixels. Affects decoding time
- */
-enum TheoraOutputMode
-{
- // A = full alpha (255), order of letters represents the byte order for a pixel
- // A means the image is treated as if it contains an alpha channel, while X formats
- // just mean that RGB frame is transformed to a 4 byte format
- TH_UNDEFINED = 0,
- TH_RGB = 1,
- TH_RGBA = 2,
- TH_RGBX = 3,
- TH_ARGB = 4,
- TH_XRGB = 5,
- TH_BGR = 6,
- TH_BGRA = 7,
- TH_BGRX = 8,
- TH_ABGR = 9,
- TH_XBGR = 10,
- TH_GREY = 11,
- TH_GREY3 = 12,
- TH_GREY3A = 13,
- TH_GREY3X = 14,
- TH_AGREY3 = 15,
- TH_XGREY3 = 16,
- TH_YUV = 17,
- TH_YUVA = 18,
- TH_YUVX = 19,
- TH_AYUV = 20,
- TH_XYUV = 21
-};
-
-/**
- This object contains all data related to video playback, eg. the open source file,
- the frame queue etc.
-*/
-class TheoraPlayerExport TheoraVideoClip
-{
- friend class TheoraWorkerThread;
- friend class TheoraVideoFrame;
- friend class TheoraVideoManager;
-protected:
- TheoraFrameQueue* mFrameQueue;
- TheoraAudioInterface* mAudioInterface;
- TheoraDataSource* mStream;
-
- TheoraTimer *mTimer, *mDefaultTimer;
-
- TheoraWorkerThread* mAssignedWorkerThread;
-
- bool mUseAlpha;
-
- bool mWaitingForCache;
-
- // benchmark vars
- int mNumDroppedFrames, mNumDisplayedFrames, mNumPrecachedFrames;
-
- int mThreadAccessCount; //! counter used by TheoraVideoManager to schedule workload
-
- int mSeekFrame; //! stores desired seek position as a frame number. next worker thread will do the seeking and reset this var to -1
- float mDuration, mFrameDuration, mFPS;
- float mPriority; //! User assigned priority. Default value is 1
- std::string mName;
- int mWidth, mHeight, mStride;
- int mNumFrames;
- int audio_track;
-
- int mSubFrameWidth, mSubFrameHeight, mSubFrameOffsetX, mSubFrameOffsetY;
- float mAudioGain; //! multiplier for audio samples. between 0 and 1
-
- TheoraOutputMode mOutputMode, mRequestedOutputMode;
- bool mFirstFrameDisplayed;
- bool mAutoRestart;
- bool mEndOfFile, mRestarted;
- int mIteration, mPlaybackIteration; //! used to ensure smooth playback of looping videos
-
- TheoraMutex* mAudioMutex; //! syncs audio decoding and extraction
- TheoraMutex* mThreadAccessMutex;
-
- /**
- * Get the priority of a video clip. based on a forumula that includes user
- * priority factor, whether the video is paused or not, how many precached
- * frames it has etc.
- * This function is used in TheoraVideoManager to efficiently distribute job
- * assignments among worker threads
- * @return priority number of this video clip
- */
- int calculatePriority();
- void readTheoraVorbisHeaders();
- virtual void doSeek() = 0; //! called by WorkerThread to seek to mSeekFrame
- virtual bool _readData() = 0;
- bool isBusy();
-
- /**
- * decodes audio from the vorbis stream and stores it in audio packets
- * This is an internal function of TheoraVideoClip, called regularly if playing an
- * audio enabled video clip.
- * @return last decoded timestamp (if found in decoded packet's granule position)
- */
- virtual float decodeAudio() = 0;
-
- int _getNumReadyFrames();
- void resetFrameQueue();
- int discardOutdatedFrames(float absTime);
- float getAbsPlaybackTime();
- virtual void load(TheoraDataSource* source) = 0;
-
- virtual void _restart() = 0; // resets the decoder and stream but leaves the frame queue intact
-public:
- TheoraVideoClip(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride);
- virtual ~TheoraVideoClip();
-
- std::string getName();
- //! Returns the string name of the decoder backend (eg. Theora, AVFoundation)
- virtual std::string getDecoderName() = 0;
-
- //! benchmark function
- int getNumDisplayedFrames() { return mNumDisplayedFrames; }
- //! benchmark function
- int getNumDroppedFrames() { return mNumDroppedFrames; }
-
- //! return width in pixels of the video clip
- int getWidth();
- //! return height in pixels of the video clip
- int getHeight();
-
- //! Width of the actual picture inside a video frame (depending on implementation, this may be equal to mWidth or differ within a codec block size (usually 16))
- int getSubFrameWidth();
- //! Height of the actual picture inside a video frame (depending on implementation, this may be equal to mHeight or differ within a codec block size (usually 16))
- int getSubFrameHeight();
- //! X Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or within a codec block size (usually 16))
- int getSubFrameOffsetX();
- //! Y Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or differ within a codec block size (usually 16))
- int getSubFrameOffsetY();
- /**
- \brief return stride in pixels
-
- If you've specified usePower2Stride when creating the TheoraVideoClip object
- then this value will be the next power of two size compared to width,
- eg: w=376, stride=512.
-
- Otherwise, stride will be equal to width
- */
- int getStride() { return mStride; }
-
- //! retur the timer objet associated with this object
- TheoraTimer* getTimer();
- //! replace the timer object with a new one
- void setTimer(TheoraTimer* timer);
-
- //! used by TheoraWorkerThread, do not call directly
- virtual bool decodeNextFrame() = 0;
-
- //! advance time. TheoraVideoManager calls this
- void update(float timeDelta);
- /**
- \brief update timer to the display time of the next frame
-
- useful if you want to grab frames instead of regular display
- \return time advanced. 0 if no frames are ready
- */
- float updateToNextFrame();
-
-
- TheoraFrameQueue* getFrameQueue();
-
- /**
- \brief pop the frame from the front of the FrameQueue
-
- see TheoraFrameQueue::pop() for more details
- */
- void popFrame();
-
- /**
- \brief Returns the first available frame in the queue or NULL if no frames are available.
-
- see TheoraFrameQueue::getFirstAvailableFrame() for more details
- */
- TheoraVideoFrame* getNextFrame();
- /**
- check if there is enough audio data decoded to submit to the audio interface
-
- TheoraWorkerThread calls this
- */
- virtual void decodedAudioCheck() = 0;
-
- void setAudioInterface(TheoraAudioInterface* iface);
- TheoraAudioInterface* getAudioInterface();
-
- /**
- \brief resize the frame queues
-
- Warning: this call discards ready frames in the frame queue
- */
- void setNumPrecachedFrames(int n);
- //! returns the size of the frame queue
- int getNumPrecachedFrames();
- //! returns the number of ready frames in the frame queue
- int getNumReadyFrames();
-
- //! if you want to adjust the audio gain. range [0,1]
- void setAudioGain(float gain);
- float getAudioGain();
-
- //! if you want the video to automatically and smoothly restart when the last frame is reached
- void setAutoRestart(bool value);
- bool getAutoRestart() { return mAutoRestart; }
-
-
- void set_audio_track(int p_track) { audio_track=p_track; }
-
- /**
- TODO: user priority. Useful only when more than one video is being decoded
- */
- void setPriority(float priority);
- float getPriority();
-
- //! Used by TheoraVideoManager to schedule work
- float getPriorityIndex();
-
- //! get the current time index from the timer object
- float getTimePosition();
- //! get the duration of the movie in seconds
- float getDuration();
- //! return the clips' frame rate, warning, fps can be a non integer number!
- float getFPS();
- //! get the number of frames in this movie
- int getNumFrames() { return mNumFrames; }
-
- //! return the current output mode for this video object
- TheoraOutputMode getOutputMode();
- /**
- set a new output mode
-
- Warning: this discards the frame queue. ready frames will be lost.
- */
- void setOutputMode(TheoraOutputMode mode);
-
- bool isDone();
- void play();
- void pause();
- void restart();
- bool isPaused();
- void stop();
- void setPlaybackSpeed(float speed);
- float getPlaybackSpeed();
- //! seek to a given time position
- void seek(float time);
- //! seek to a given frame number
- void seekToFrame(int frame);
- //! wait max_time for the clip to cache a given percentage of frames, factor in range [0,1]
- void waitForCache(float desired_cache_factor = 0.5f, float max_wait_time = 1.0f);
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h
deleted file mode 100644
index 5d27f54d1c..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraVideoFrame_h
-#define _TheoraVideoFrame_h
-
-#include "TheoraExport.h"
-#include "TheoraVideoClip.h"
-
-struct TheoraPixelTransform;
-/**
-
-*/
-class TheoraPlayerExport TheoraVideoFrame
-{
-protected:
- TheoraVideoClip* mParent;
- unsigned char* mBuffer;
- unsigned long mFrameNumber;
-public:
- //! global time in seconds this frame should be displayed on
- float mTimeToDisplay;
- //! whether the frame is ready for display or not
- bool mReady;
- //! indicates the frame is being used by TheoraWorkerThread instance
- bool mInUse;
- //! used to keep track of linear time in looping videos
- int mIteration;
-
- int mBpp;
-
- TheoraVideoFrame(TheoraVideoClip* parent);
- virtual ~TheoraVideoFrame();
-
- //! internal function, do not use directly
- void _setFrameNumber(unsigned long number) { mFrameNumber = number; }
- //! returns the frame number of this frame in the theora stream
- unsigned long getFrameNumber() { return mFrameNumber; }
-
- void clear();
-
- int getWidth();
- int getStride();
- int getHeight();
-
- unsigned char* getBuffer();
-
- //! Called by TheoraVideoClip to decode a source buffer onto itself
- virtual void decode(struct TheoraPixelTransform* t);
-};
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h
deleted file mode 100644
index d94c51b4d4..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraVideoManager_h
-#define _TheoraVideoManager_h
-
-#include <vector>
-#include <list>
-#include <string>
-#include "TheoraExport.h"
-#include "TheoraVideoClip.h"
-#ifdef _WIN32
-#pragma warning( disable: 4251 ) // MSVC++
-#endif
-// forward class declarations
-class TheoraWorkerThread;
-class TheoraMutex;
-class TheoraDataSource;
-class TheoraAudioInterfaceFactory;
-/**
- This is the main singleton class that handles all playback/sync operations
-*/
-class TheoraPlayerExport TheoraVideoManager
-{
-protected:
- friend class TheoraWorkerThread;
- typedef std::vector<TheoraVideoClip*> ClipList;
- typedef std::vector<TheoraWorkerThread*> ThreadList;
-
- //! stores pointers to worker threads which are decoding video and audio
- ThreadList mWorkerThreads;
- //! stores pointers to created video clips
- ClipList mClips;
-
- //! stores pointer to clips that were docoded in the past in order to achieve fair scheduling
- std::list<TheoraVideoClip*> mWorkLog;
-
- int mDefaultNumPrecachedFrames;
-
- TheoraMutex* mWorkMutex;
- TheoraAudioInterfaceFactory* mAudioFactory;
-
- void createWorkerThreads(int n);
- void destroyWorkerThreads();
-
- float calcClipWorkTime(TheoraVideoClip* clip);
-
- /**
- * Called by TheoraWorkerThread to request a TheoraVideoClip instance to work on decoding
- */
- TheoraVideoClip* requestWork(TheoraWorkerThread* caller);
-public:
- TheoraVideoManager(int num_worker_threads=1);
- virtual ~TheoraVideoManager();
-
- //! get the global reference to the manager instance
- static TheoraVideoManager& getSingleton();
- //! get the global pointer to the manager instance
- static TheoraVideoManager* getSingletonPtr();
-
- //! search registered clips by name
- TheoraVideoClip* getVideoClipByName(std::string name);
-
- TheoraVideoClip* createVideoClip(std::string filename,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_track=0);
- TheoraVideoClip* createVideoClip(TheoraDataSource* data_source,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_audio_track=0);
-
- void update(float timeDelta);
-
- void destroyVideoClip(TheoraVideoClip* clip);
-
- void setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory);
- TheoraAudioInterfaceFactory* getAudioInterfaceFactory();
-
- int getNumWorkerThreads();
- void setNumWorkerThreads(int n);
-
- void setDefaultNumPrecachedFrames(int n) { mDefaultNumPrecachedFrames=n; }
- int getDefaultNumPrecachedFrames() { return mDefaultNumPrecachedFrames; }
-
- //! used by libtheoraplayer functions
- void logMessage(std::string msg);
-
- /**
- \brief you can set your own log function to recieve theora's log calls
-
- This way you can integrate libtheoraplayer's log messages in your own
- logging system, prefix them, mute them or whatever you want
- */
- static void setLogFunction(void (*fn)(std::string));
-
- //! get nicely formated version string
- std::string getVersionString();
- /**
- \brief get version numbers
-
- if c is negative, it means it's a release candidate -c
- */
- void getVersion(int* a,int* b,int* c);
-
- //! returns the supported decoders (eg. Theora, AVFoundation...)
- std::vector<std::string> getSupportedDecoders();
-};
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h b/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h
deleted file mode 100644
index 2299acedbd..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraWorkerThread_h
-#define _TheoraWorkerThread_h
-
-#include "TheoraAsync.h"
-
-class TheoraVideoClip;
-
-/**
- This is the worker thread, requests work from TheoraVideoManager
- and decodes assigned TheoraVideoClip objects
-*/
-class TheoraWorkerThread : public TheoraThread
-{
- TheoraVideoClip* mClip;
-public:
- TheoraWorkerThread();
- ~TheoraWorkerThread();
-
- TheoraVideoClip* getAssignedClip() { return mClip; }
-
- //! Main Thread Body - do not call directly!
- void execute();
-};
-#endif
diff --git a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h b/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h
deleted file mode 100644
index abd898aa01..0000000000
--- a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#if defined(__AVFOUNDATION) && !defined(_TheoraVideoClip_AVFoundation_h)
-#define _TheoraVideoClip_AVFoundation_h
-
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraVideoClip.h"
-
-#ifndef AVFOUNDATION_CLASSES_DEFINED
-class AVAssetReader;
-class AVAssetReaderTrackOutput;
-#endif
-
-class TheoraVideoClip_AVFoundation : public TheoraVideoClip, public TheoraAudioPacketQueue
-{
-protected:
- bool mLoaded;
- int mFrameNumber;
- AVAssetReader* mReader;
- AVAssetReaderTrackOutput *mOutput, *mAudioOutput;
- unsigned int mReadAudioSamples;
-
- void unload();
- void doSeek();
-public:
- TheoraVideoClip_AVFoundation(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride);
- ~TheoraVideoClip_AVFoundation();
-
- bool _readData();
- bool decodeNextFrame();
- void _restart();
- void load(TheoraDataSource* source);
- float decodeAudio();
- void decodedAudioCheck();
- std::string getDecoderName() { return "AVFoundation"; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm b/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm
deleted file mode 100644
index 1b5cf0ab13..0000000000
--- a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm
+++ /dev/null
@@ -1,457 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef __AVFOUNDATION
-#define AVFOUNDATION_CLASSES_DEFINED
-#import <AVFoundation/AVFoundation.h>
-#include "TheoraAudioInterface.h"
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#include "TheoraTimer.h"
-#include "TheoraUtil.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip_AVFoundation.h"
-#include "TheoraPixelTransform.h"
-
-#ifdef _AVFOUNDATION_BGRX
-// a fast function developed to use kernel byte swapping calls to optimize alpha decoding.
-// In AVFoundation, BGRX mode conversion is prefered to YUV conversion because apple's YUV
-// conversion on iOS seems to run faster than libtheoraplayer's implementation
-// This may change in the future with more optimizations to libtheoraplayers's YUV conversion
-// code, making this function obsolete.
-static void bgrx2rgba(unsigned char* dest, int w, int h, struct TheoraPixelTransform* t)
-{
- unsigned register int a;
- unsigned int *dst = (unsigned int*) dest, *dstEnd;
- unsigned char* src = t->raw;
- int y, x, ax;
-
- for (y = 0; y < h; ++y, src += t->rawStride)
- {
- for (x = 0, ax = w * 4, dstEnd = dst + w; dst != dstEnd; x += 4, ax += 4, ++dst)
- {
- // use the full alpha range here because the Y channel has already been converted
- // to RGB and that's in [0, 255] range.
- a = src[ax];
- *dst = (OSReadSwapInt32(src, x) >> 8) | (a << 24);
- }
- }
-}
-#endif
-
-static CVPlanarPixelBufferInfo_YCbCrPlanar getYUVStruct(void* src)
-{
- CVPlanarPixelBufferInfo_YCbCrPlanar* bigEndianYuv = (CVPlanarPixelBufferInfo_YCbCrPlanar*) src;
- CVPlanarPixelBufferInfo_YCbCrPlanar yuv;
- yuv.componentInfoY.offset = OSSwapInt32(bigEndianYuv->componentInfoY.offset);
- yuv.componentInfoY.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoY.rowBytes);
- yuv.componentInfoCb.offset = OSSwapInt32(bigEndianYuv->componentInfoCb.offset);
- yuv.componentInfoCb.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCb.rowBytes);
- yuv.componentInfoCr.offset = OSSwapInt32(bigEndianYuv->componentInfoCr.offset);
- yuv.componentInfoCr.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCr.rowBytes);
- return yuv;
-}
-
-TheoraVideoClip_AVFoundation::TheoraVideoClip_AVFoundation(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride):
- TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
- TheoraAudioPacketQueue()
-{
- mLoaded = 0;
- mReader = NULL;
- mOutput = mAudioOutput = NULL;
- mReadAudioSamples = mAudioFrequency = mNumAudioChannels = 0;
-}
-
-TheoraVideoClip_AVFoundation::~TheoraVideoClip_AVFoundation()
-{
- unload();
-}
-
-void TheoraVideoClip_AVFoundation::unload()
-{
- if (mOutput != NULL || mAudioOutput != NULL || mReader != NULL)
- {
- NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
-
- if (mOutput != NULL)
- {
- [mOutput release];
- mOutput = NULL;
- }
-
- if (mAudioOutput)
- {
- [mAudioOutput release];
- mAudioOutput = NULL;
- }
-
- if (mReader != NULL)
- {
- [mReader release];
- mReader = NULL;
- }
-
- [pool release];
- }
-}
-
-bool TheoraVideoClip_AVFoundation::_readData()
-{
- return 1;
-}
-
-bool TheoraVideoClip_AVFoundation::decodeNextFrame()
-{
- if (mReader == NULL || mEndOfFile) return 0;
- AVAssetReaderStatus status = [mReader status];
- if (status == AVAssetReaderStatusFailed)
- {
- // This can happen on iOS when you suspend the app... Only happens on the device, iOS simulator seems to work fine.
- th_writelog("AVAssetReader reading failed, restarting...");
-
- mSeekFrame = mTimer->getTime() * mFPS;
- // just in case
- if (mSeekFrame < 0) mSeekFrame = 0;
- if (mSeekFrame > mDuration * mFPS - 1) mSeekFrame = mDuration * mFPS - 1;
- _restart();
- status = [mReader status];
- if (status == AVAssetReaderStatusFailed)
- {
- th_writelog("AVAssetReader restart failed!");
- return 0;
- }
- th_writelog("AVAssetReader restart succeeded!");
- }
-
- TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
- if (!frame) return 0;
-
- CMSampleBufferRef sampleBuffer = NULL;
- NSAutoreleasePool* pool = NULL;
- CMTime presentationTime;
-
- if (mAudioInterface) decodeAudio();
-
- if (status == AVAssetReaderStatusReading)
- {
- pool = [[NSAutoreleasePool alloc] init];
-
- while ((sampleBuffer = [mOutput copyNextSampleBuffer]))
- {
- presentationTime = CMSampleBufferGetOutputPresentationTimeStamp(sampleBuffer);
- frame->mTimeToDisplay = (float) CMTimeGetSeconds(presentationTime);
- frame->mIteration = mIteration;
- frame->_setFrameNumber(mFrameNumber);
- ++mFrameNumber;
- if (frame->mTimeToDisplay < mTimer->getTime() && !mRestarted && mFrameNumber % 16 != 0)
- {
- // %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand.
-#ifdef _DEBUG
- th_writelog(mName + ": pre-dropped frame " + str(mFrameNumber - 1));
-#endif
- ++mNumDisplayedFrames;
- ++mNumDroppedFrames;
- CMSampleBufferInvalidate(sampleBuffer);
- CFRelease(sampleBuffer);
- sampleBuffer = NULL;
- continue; // drop frame
- }
-
- CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
- CVPixelBufferLockBaseAddress(imageBuffer, 0);
- void *baseAddress = CVPixelBufferGetBaseAddress(imageBuffer);
-
- mStride = CVPixelBufferGetBytesPerRow(imageBuffer);
- size_t width = CVPixelBufferGetWidth(imageBuffer);
- size_t height = CVPixelBufferGetHeight(imageBuffer);
-
- TheoraPixelTransform t;
- memset(&t, 0, sizeof(TheoraPixelTransform));
-#ifdef _AVFOUNDATION_BGRX
- if (mOutputMode == TH_BGRX || mOutputMode == TH_RGBA)
- {
- t.raw = (unsigned char*) baseAddress;
- t.rawStride = mStride;
- }
- else
-#endif
- {
- CVPlanarPixelBufferInfo_YCbCrPlanar yuv = getYUVStruct(baseAddress);
-
- t.y = (unsigned char*) baseAddress + yuv.componentInfoY.offset; t.yStride = yuv.componentInfoY.rowBytes;
- t.u = (unsigned char*) baseAddress + yuv.componentInfoCb.offset; t.uStride = yuv.componentInfoCb.rowBytes;
- t.v = (unsigned char*) baseAddress + yuv.componentInfoCr.offset; t.vStride = yuv.componentInfoCr.rowBytes;
- }
-#ifdef _AVFOUNDATION_BGRX
- if (mOutputMode == TH_RGBA)
- {
- for (int i = 0; i < 1000; ++i)
- bgrx2rgba(frame->getBuffer(), mWidth / 2, mHeight, &t);
- frame->mReady = true;
- }
- else
-#endif
- frame->decode(&t);
-
- CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
- CMSampleBufferInvalidate(sampleBuffer);
- CFRelease(sampleBuffer);
-
- break; // TODO - should this really be a while loop instead of an if block?
- }
- }
- if (pool) [pool release];
-
- if (!frame->mReady) // in case the frame wasn't used
- {
- frame->mInUse = 0;
- }
-
- if (sampleBuffer == NULL && mReader.status == AVAssetReaderStatusCompleted) // other cases could be app suspended
- {
- if (mAutoRestart)
- {
- ++mIteration;
- _restart();
- }
- else
- {
- unload();
- mEndOfFile = true;
- }
- return 0;
- }
-
-
- return 1;
-}
-
-void TheoraVideoClip_AVFoundation::_restart()
-{
- mEndOfFile = false;
- unload();
- load(mStream);
- mRestarted = true;
-}
-
-void TheoraVideoClip_AVFoundation::load(TheoraDataSource* source)
-{
- mStream = source;
- mFrameNumber = 0;
- mEndOfFile = false;
- TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(source);
- std::string filename;
- if (fileDataSource != NULL) filename = fileDataSource->getFilename();
- else
- {
- TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(source);
- if (memoryDataSource != NULL) filename = memoryDataSource->getFilename();
- else throw TheoraGenericException("Unable to load MP4 file");
- }
-
- NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
- NSString* path = [NSString stringWithUTF8String:filename.c_str()];
- NSError* err;
- NSURL *url = [NSURL fileURLWithPath:path];
- AVAsset* asset = [[AVURLAsset alloc] initWithURL:url options:nil];
- mReader = [[AVAssetReader alloc] initWithAsset:asset error:&err];
- NSArray* tracks = [asset tracksWithMediaType:AVMediaTypeVideo];
- if ([tracks count] == 0)
- throw TheoraGenericException("Unable to open video file: " + filename);
- AVAssetTrack *videoTrack = [tracks objectAtIndex:0];
-
- NSArray* audioTracks = [asset tracksWithMediaType:AVMediaTypeAudio];
- if (audio_track >= audioTracks.count)
- audio_track = 0;
- AVAssetTrack *audioTrack = audioTracks.count > 0 ? [audioTracks objectAtIndex:audio_track] : NULL;
- printf("*********** using audio track %i\n", audio_track);
-
-#ifdef _AVFOUNDATION_BGRX
- bool yuv_output = (mOutputMode != TH_BGRX && mOutputMode != TH_RGBA);
-#else
- bool yuv_output = true;
-#endif
-
- NSDictionary *videoOptions = [NSDictionary dictionaryWithObjectsAndKeys:[NSNumber numberWithInt:(yuv_output) ? kCVPixelFormatType_420YpCbCr8Planar : kCVPixelFormatType_32BGRA], kCVPixelBufferPixelFormatTypeKey, nil];
-
- mOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:videoTrack outputSettings:videoOptions];
- [mReader addOutput:mOutput];
- if ([mOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0
- mOutput.alwaysCopiesSampleData = NO;
-
- mFPS = videoTrack.nominalFrameRate;
- mWidth = mSubFrameWidth = mStride = videoTrack.naturalSize.width;
- mHeight = mSubFrameHeight = videoTrack.naturalSize.height;
- mFrameDuration = 1.0f / mFPS;
- mDuration = (float) CMTimeGetSeconds(asset.duration);
- if (mFrameQueue == NULL)
- {
- mFrameQueue = new TheoraFrameQueue(this);
- mFrameQueue->setSize(mNumPrecachedFrames);
- }
-
- if (mSeekFrame != -1)
- {
- mFrameNumber = mSeekFrame;
- [mReader setTimeRange: CMTimeRangeMake(CMTimeMakeWithSeconds(mSeekFrame / mFPS, 1), kCMTimePositiveInfinity)];
- }
- if (audioTrack)
- {
- TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory();
- if (audio_factory)
- {
- NSDictionary *audioOptions = [NSDictionary dictionaryWithObjectsAndKeys:
- [NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
- [NSNumber numberWithBool:NO], AVLinearPCMIsNonInterleaved,
- [NSNumber numberWithBool:NO], AVLinearPCMIsBigEndianKey,
- [NSNumber numberWithBool:YES], AVLinearPCMIsFloatKey,
- [NSNumber numberWithInt:32], AVLinearPCMBitDepthKey,
- nil];
-
- mAudioOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:audioTrack outputSettings:audioOptions];
- [mReader addOutput:mAudioOutput];
- if ([mAudioOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0
- mAudioOutput.alwaysCopiesSampleData = NO;
-
- NSArray* desclst = audioTrack.formatDescriptions;
- CMAudioFormatDescriptionRef desc = (CMAudioFormatDescriptionRef) [desclst objectAtIndex:0];
- const AudioStreamBasicDescription* audioDesc = CMAudioFormatDescriptionGetStreamBasicDescription(desc);
- mAudioFrequency = (unsigned int) audioDesc->mSampleRate;
- mNumAudioChannels = audioDesc->mChannelsPerFrame;
-
- if (mSeekFrame != -1)
- {
- mReadAudioSamples = mFrameNumber * (mAudioFrequency * mNumAudioChannels) / mFPS;
- }
- else mReadAudioSamples = 0;
-
- if (mAudioInterface == NULL)
- setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency));
- }
- }
-
-#ifdef _DEBUG
- else if (!mLoaded)
- {
- th_writelog("-----\nwidth: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS()));
- th_writelog("duration: " + strf(mDuration) + " seconds\n-----");
- }
-#endif
- [mReader startReading];
- [pool release];
- mLoaded = true;
-}
-
-void TheoraVideoClip_AVFoundation::decodedAudioCheck()
-{
- if (!mAudioInterface || mTimer->isPaused()) return;
-
- mAudioMutex->lock();
- flushAudioPackets(mAudioInterface);
- mAudioMutex->unlock();
-}
-
-float TheoraVideoClip_AVFoundation::decodeAudio()
-{
- if (mRestarted) return -1;
-
- if (mReader == NULL || mEndOfFile) return 0;
- AVAssetReaderStatus status = [mReader status];
-
- if (mAudioOutput)
- {
- CMSampleBufferRef sampleBuffer = NULL;
- NSAutoreleasePool* pool = NULL;
- bool mutexLocked = 0;
-
- float factor = 1.0f / (mAudioFrequency * mNumAudioChannels);
- float videoTime = (float) mFrameNumber / mFPS;
- float min = mFrameQueue->getSize() / mFPS + 1.0f;
-
- if (status == AVAssetReaderStatusReading)
- {
- pool = [[NSAutoreleasePool alloc] init];
-
- // always buffer up of audio ahead of the frames
- while (mReadAudioSamples * factor - videoTime < min)
- {
- if ((sampleBuffer = [mAudioOutput copyNextSampleBuffer]))
- {
- AudioBufferList audioBufferList;
-
- CMBlockBufferRef blockBuffer = NULL;
- CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(sampleBuffer, NULL, &audioBufferList, sizeof(audioBufferList), NULL, NULL, 0, &blockBuffer);
-
- for (int y = 0; y < audioBufferList.mNumberBuffers; ++y)
- {
- AudioBuffer audioBuffer = audioBufferList.mBuffers[y];
- float *frame = (float*) audioBuffer.mData;
-
- if (!mutexLocked)
- {
- mAudioMutex->lock();
- mutexLocked = 1;
- }
- addAudioPacket(frame, audioBuffer.mDataByteSize / (mNumAudioChannels * sizeof(float)), mAudioGain);
-
- mReadAudioSamples += audioBuffer.mDataByteSize / (sizeof(float));
- }
-
- CFRelease(blockBuffer);
- CMSampleBufferInvalidate(sampleBuffer);
- CFRelease(sampleBuffer);
- }
- else
- {
- [mAudioOutput release];
- mAudioOutput = nil;
- break;
- }
- }
- [pool release];
- }
- if (mutexLocked) mAudioMutex->unlock();
- }
-
- return -1;
-}
-
-void TheoraVideoClip_AVFoundation::doSeek()
-{
-#if _DEBUG
- th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame));
-#endif
- int frame;
- float time = mSeekFrame / getFPS();
- mTimer->seek(time);
- bool paused = mTimer->isPaused();
- if (!paused) mTimer->pause(); // pause until seeking is done
-
- mEndOfFile = false;
- mRestarted = false;
-
- resetFrameQueue();
- unload();
- load(mStream);
-
- if (mAudioInterface)
- {
- mAudioMutex->lock();
- destroyAllAudioPackets();
- mAudioMutex->unlock();
- }
-
- if (!paused) mTimer->play();
- mSeekFrame = -1;
-}
-#endif
diff --git a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp b/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp
deleted file mode 100644
index fa3fd43a47..0000000000
--- a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp
+++ /dev/null
@@ -1,439 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef __FFMPEG
-#include "TheoraAudioInterface.h"
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#include "TheoraTimer.h"
-#include "TheoraUtil.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip_FFmpeg.h"
-#include "TheoraPixelTransform.h"
-
-#define READ_BUFFER_SIZE 4096
-
-#ifdef __cplusplus
-#define __STDC_CONSTANT_MACROS
-#ifdef _STDINT_H
-#undef _STDINT_H
-#endif
-# include <stdint.h>
-#endif
-
-#define _FFMPEG_DEBUG
-
-extern "C"
-{
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
-#include "libavutil/avassert.h"
-}
-
-static bool ffmpegInitialised = 0;
-
-static int readFunction(void* data, uint8_t* buf, int buf_size)
-{
-#ifdef _FFMPEG_DEBUG
- th_writelog("reading " + str(buf_size) + " bytes");
-#endif
-
- TheoraDataSource* src = (TheoraDataSource*) data;
- return src->read(buf, buf_size);
-}
-
-static int64_t seekFunction(void* data, int64_t offset, int whence)
-{
-#ifdef _FFMPEG_DEBUG
- th_writelog("seeking: offset = " + str((long) offset) + ", whence = " + str(whence));
-#endif
-
- TheoraDataSource* src = (TheoraDataSource*) data;
- if (whence == AVSEEK_SIZE)
- return src->size();
- else if (whence == SEEK_SET)
- src->seek((long) offset);
- else if (whence == SEEK_END)
- src->seek(src->size() - (long) offset);
- return src->tell();
-}
-
-static void avlog_theoraplayer(void* p, int level, const char* fmt, va_list vargs)
-{
- th_writelog(fmt);
- static char logstr[2048];
- vsprintf(logstr, fmt, vargs);
- th_writelog("ffmpeg: " + std::string(logstr));
-}
-
-
-std::string text;
-
-static void _log(const char* s)
-{
- text += s;
-// th_writelog(text);
-// text = "";
-}
-
-static void _log(const char c)
-{
- char s[2] = {c, 0};
- _log(s);
-}
-
-static const AVCodec *next_codec_for_id(enum AVCodecID id, const AVCodec *prev,
- int encoder)
-{
- while ((prev = av_codec_next(prev))) {
- if (prev->id == id &&
- (encoder ? av_codec_is_encoder(prev) : av_codec_is_decoder(prev)))
- return prev;
- }
- return NULL;
-}
-
-static int compare_codec_desc(const void *a, const void *b)
-{
- const AVCodecDescriptor **da = (const AVCodecDescriptor **) a;
- const AVCodecDescriptor **db = (const AVCodecDescriptor **) b;
-
- return (*da)->type != (*db)->type ? (*da)->type - (*db)->type :
- strcmp((*da)->name, (*db)->name);
-}
-
-static unsigned get_codecs_sorted(const AVCodecDescriptor ***rcodecs)
-{
- const AVCodecDescriptor *desc = NULL;
- const AVCodecDescriptor **codecs;
- unsigned nb_codecs = 0, i = 0;
-
- while ((desc = avcodec_descriptor_next(desc)))
- ++nb_codecs;
- if (!(codecs = (const AVCodecDescriptor**) av_calloc(nb_codecs, sizeof(*codecs)))) {
- av_log(NULL, AV_LOG_ERROR, "Out of memory\n");
- exit(1);
- }
- desc = NULL;
- while ((desc = avcodec_descriptor_next(desc)))
- codecs[i++] = desc;
- av_assert0(i == nb_codecs);
- qsort(codecs, nb_codecs, sizeof(*codecs), compare_codec_desc);
- *rcodecs = codecs;
- return nb_codecs;
-}
-
-static char get_media_type_char(enum AVMediaType type)
-{
- switch (type) {
- case AVMEDIA_TYPE_VIDEO: return 'V';
- case AVMEDIA_TYPE_AUDIO: return 'A';
- case AVMEDIA_TYPE_DATA: return 'D';
- case AVMEDIA_TYPE_SUBTITLE: return 'S';
- case AVMEDIA_TYPE_ATTACHMENT:return 'T';
- default: return '?';
- }
-}
-
-static void print_codecs_for_id(enum AVCodecID id, int encoder)
-{
- const AVCodec *codec = NULL;
-
- _log(encoder ? "encoders" : "decoders");
-
- while ((codec = next_codec_for_id(id, codec, encoder)))
- _log(codec->name);
-
- _log(")");
-}
-
-int show_codecs(void *optctx, const char *opt, const char *arg)
-{
- const AVCodecDescriptor **codecs;
- unsigned i, nb_codecs = get_codecs_sorted(&codecs);
-
- char tmp[1024];
- th_writelog("Codecs:\n"
- " D..... = Decoding supported\n"
- " .E.... = Encoding supported\n"
- " ..V... = Video codec\n"
- " ..A... = Audio codec\n"
- " ..S... = Subtitle codec\n"
- " ...I.. = Intra frame-only codec\n"
- " ....L. = Lossy compression\n"
- " .....S = Lossless compression\n"
- " -------\n");
- for (i = 0; i < nb_codecs; ++i) {
- const AVCodecDescriptor *desc = codecs[i];
- const AVCodec *codec = NULL;
-
- _log(" ");
- _log(avcodec_find_decoder(desc->id) ? "D" : ".");
- _log(avcodec_find_encoder(desc->id) ? "E" : ".");
-
- _log(get_media_type_char(desc->type));
- _log((desc->props & AV_CODEC_PROP_INTRA_ONLY) ? "I" : ".");
- _log((desc->props & AV_CODEC_PROP_LOSSY) ? "L" : ".");
- _log((desc->props & AV_CODEC_PROP_LOSSLESS) ? "S" : ".");
-
-
- sprintf(tmp, " %-20s %s", desc->name, desc->long_name ? desc->long_name : "");
-
- _log(tmp);
- /* print decoders/encoders when there's more than one or their
- * names are different from codec name */
- while ((codec = next_codec_for_id(desc->id, codec, 0))) {
- if (strcmp(codec->name, desc->name)) {
- print_codecs_for_id(desc->id, 0);
- break;
- }
- }
- codec = NULL;
- while ((codec = next_codec_for_id(desc->id, codec, 1))) {
- if (strcmp(codec->name, desc->name)) {
- print_codecs_for_id(desc->id, 1);
- break;
- }
- }
- _log("\n");
- }
- av_free(codecs);
-
- av_log(0, 0, "%s", text.c_str());
- return 0;
-}
-
-TheoraVideoClip_FFmpeg::TheoraVideoClip_FFmpeg(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride):
- TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
- TheoraAudioPacketQueue()
-{
- mFormatContext = NULL;
- mCodecContext = NULL;
- mCodec = NULL;
- mFrame = NULL;
- mVideoStreamIndex = -1;
-}
-
-TheoraVideoClip_FFmpeg::~TheoraVideoClip_FFmpeg()
-{
- unload();
-}
-
-void TheoraVideoClip_FFmpeg::load(TheoraDataSource* source)
-{
- mVideoStreamIndex = -1;
- mFrameNumber = 0;
- AVDictionary* optionsDict = NULL;
-
- if (!ffmpegInitialised)
- {
-#ifdef _FFMPEG_DEBUG
- th_writelog("Initializing ffmpeg");
-#endif
- th_writelog("avcodec version: " + str(avcodec_version()));
- av_register_all();
- av_log_set_level(AV_LOG_DEBUG);
- av_log_set_callback(avlog_theoraplayer);
- ffmpegInitialised = 1;
- //show_codecs(0, 0, 0);
- }
-
- mInputBuffer = (unsigned char*) av_malloc(READ_BUFFER_SIZE);
- mAvioContext = avio_alloc_context(mInputBuffer, READ_BUFFER_SIZE, 0, source, &readFunction, NULL, &seekFunction);
-
-#ifdef _FFMPEG_DEBUG
- th_writelog(mName + ": avio context created");
-#endif
-
- mFormatContext = avformat_alloc_context();
-#ifdef _FFMPEG_DEBUG
- th_writelog(mName + ": avformat context created");
-#endif
- mFormatContext->pb = mAvioContext;
-
- int err;
- if ((err = avformat_open_input(&mFormatContext, "", NULL, NULL)) != 0)
- {
- th_writelog(mName + ": avformat input opening failed!");
- th_writelog(mName + ": error_code: " + str(err));
- return;
- }
-
-#ifdef _FFMPEG_DEBUG
- th_writelog(mName + ": avformat input opened");
-#endif
-
- // Retrieve stream information
- if (avformat_find_stream_info(mFormatContext, NULL) < 0)
- return; // Couldn't find stream information
-
-#ifdef _FFMPEG_DEBUG
- th_writelog(mName + ": got stream info");
-#endif
-
- // Dump information about file onto standard error
- // av_dump_format(mFormatContext, 0, "", 0);
-
- // Find the first video stream
- for (int i = 0; i < mFormatContext->nb_streams; ++i)
- {
- if(mFormatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
- {
- mVideoStreamIndex = i;
- break;
- }
- }
- if (mVideoStreamIndex == -1)
- return; // Didn't find a video stream
-
-#ifdef _FFMPEG_DEBUG
- th_writelog(mName + ": Found video stream at index " + str(mVideoStreamIndex));
-#endif
-
- // Get a pointer to the codec context for the video stream
- mCodecContext = mFormatContext->streams[mVideoStreamIndex]->codec;
-
- // Find the decoder for the video stream
- mCodec = avcodec_find_decoder(mCodecContext->codec_id);
- if (mCodec == NULL)
- {
- th_writelog("Unsupported codec!");
- return; // Codec not found
- }
- // Open codec
- if(avcodec_open2(mCodecContext, mCodec, &optionsDict) < 0)
- return; // Could not open codec
-
-#ifdef _FFMPEG_DEBUG
- th_writelog(mName + ": Codec opened");
-#endif
-
-
- mFrame = avcodec_alloc_frame();
-
-#ifdef _FFMPEG_DEBUG
- th_writelog(mName + ": Frame allocated");
-#endif
-
- //AVRational rational = mCodecContext->time_base;
-
- mFPS = 25; //TODOOOOOO!!!
-
- mWidth = mStride = mCodecContext->width;
- mHeight = mCodecContext->height;
- mFrameDuration = 1.0f / mFPS;
- mDuration = mFormatContext->duration / AV_TIME_BASE;
-
- if (mFrameQueue == NULL) // todo - why is this set in the backend class? it should be set in the base class, check other backends as well
- {
- mFrameQueue = new TheoraFrameQueue(this);
- mFrameQueue->setSize(mNumPrecachedFrames);
- }
-}
-
-void TheoraVideoClip_FFmpeg::unload()
-{
- if (mInputBuffer)
- {
-// av_free(mInputBuffer);
- mInputBuffer = NULL;
- }
- if (mAvioContext)
- {
- av_free(mAvioContext);
- mAvioContext = NULL;
- }
- if (mFrame)
- {
- av_free(mFrame);
- mFrame = NULL;
- }
- if (mCodecContext)
- {
- avcodec_close(mCodecContext);
- mCodecContext = NULL;
- }
- if (mFormatContext)
- {
- avformat_close_input(&mFormatContext);
- mFormatContext = NULL;
- }
-}
-
-bool TheoraVideoClip_FFmpeg::_readData()
-{
- return 1;
-}
-
-bool TheoraVideoClip_FFmpeg::decodeNextFrame()
-{
- TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
- if (!frame) return 0;
-
- AVPacket packet;
- int frameFinished;
-
- while (av_read_frame(mFormatContext, &packet) >= 0)
- {
- if (packet.stream_index == mVideoStreamIndex)
- {
- avcodec_decode_video2(mCodecContext, mFrame, &frameFinished, &packet);
-
- if (frameFinished)
- {
- TheoraPixelTransform t;
- memset(&t, 0, sizeof(TheoraPixelTransform));
-
- t.y = mFrame->data[0]; t.yStride = mFrame->linesize[0];
- t.u = mFrame->data[1]; t.uStride = mFrame->linesize[1];
- t.v = mFrame->data[2]; t.vStride = mFrame->linesize[2];
-
- frame->decode(&t);
- frame->mTimeToDisplay = mFrameNumber / mFPS;
- frame->mIteration = mIteration;
- frame->_setFrameNumber(mFrameNumber++);
-
- av_free_packet(&packet);
- break;
- }
- }
- av_free_packet(&packet);
- }
- return 1;
-}
-
-void TheoraVideoClip_FFmpeg::decodedAudioCheck()
-{
- if (!mAudioInterface || mTimer->isPaused()) return;
-
- mAudioMutex->lock();
- flushAudioPackets(mAudioInterface);
- mAudioMutex->unlock();
-}
-
-float TheoraVideoClip_FFmpeg::decodeAudio()
-{
- return -1;
-}
-
-void TheoraVideoClip_FFmpeg::doSeek()
-{
-
-}
-
-void TheoraVideoClip_FFmpeg::_restart()
-{
-
-}
-
-#endif
diff --git a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h b/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h
deleted file mode 100644
index 03f9a3d964..0000000000
--- a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#if defined(__FFMPEG) && !defined(_TheoraVideoClip_FFmpeg_h)
-#define _TheoraVideoClip_FFmpeg_h
-
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraVideoClip.h"
-
-struct AVFormatContext;
-struct AVCodecContext;
-struct AVCodec;
-struct AVFrame;
-struct AVIOContext;
-
-class TheoraVideoClip_FFmpeg : public TheoraVideoClip, public TheoraAudioPacketQueue
-{
-protected:
- bool mLoaded;
-
- AVFormatContext* mFormatContext;
- AVCodecContext* mCodecContext;
- AVIOContext* mAvioContext;
- AVCodec* mCodec;
- AVFrame* mFrame;
- unsigned char* mInputBuffer;
- int mVideoStreamIndex;
- int mFrameNumber;
-
- void unload();
- void doSeek();
-public:
- TheoraVideoClip_FFmpeg(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride);
- ~TheoraVideoClip_FFmpeg();
-
- bool _readData();
- bool decodeNextFrame();
- void _restart();
- void load(TheoraDataSource* source);
- float decodeAudio();
- void decodedAudioCheck();
- std::string getDecoderName() { return "FFmpeg"; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp b/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp
deleted file mode 100644
index c4f070ec50..0000000000
--- a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp
+++ /dev/null
@@ -1,703 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef __THEORA
-#include <memory.h>
-#include <algorithm>
-#include "TheoraVideoManager.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraAudioInterface.h"
-#include "TheoraTimer.h"
-#include "TheoraDataSource.h"
-#include "TheoraUtil.h"
-#include "TheoraException.h"
-#include "TheoraVideoClip_Theora.h"
-#include "TheoraPixelTransform.h"
-
-TheoraVideoClip_Theora::TheoraVideoClip_Theora(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride):
- TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
- TheoraAudioPacketQueue()
-{
- mInfo.TheoraDecoder = NULL;
- mInfo.TheoraSetup = NULL;
- mVorbisStreams = mTheoraStreams = 0;
- mReadAudioSamples = 0;
- mLastDecodedFrameNumber = 0;
-}
-
-TheoraVideoClip_Theora::~TheoraVideoClip_Theora()
-{
- if (mInfo.TheoraDecoder)
- {
- th_decode_free(mInfo.TheoraDecoder);
- th_setup_free(mInfo.TheoraSetup);
-
- if (mAudioInterface)
- {
- vorbis_dsp_clear(&mInfo.VorbisDSPState);
- vorbis_block_clear(&mInfo.VorbisBlock);
- }
-
- ogg_stream_clear(&mInfo.TheoraStreamState);
- th_comment_clear(&mInfo.TheoraComment);
- th_info_clear(&mInfo.TheoraInfo);
-
- ogg_stream_clear(&mInfo.VorbisStreamState);
- vorbis_comment_clear(&mInfo.VorbisComment);
- vorbis_info_clear(&mInfo.VorbisInfo);
-
- ogg_sync_clear(&mInfo.OggSyncState);
- }
-}
-
-bool TheoraVideoClip_Theora::_readData()
-{
- int audio_eos = 0, serno;
- float audio_time = 0;
- float time = mTimer->getTime();
- if (mRestarted) time = 0;
-
- for (;;)
- {
- char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
- int bytes_read = mStream->read(buffer, 4096);
- ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-
- if (bytes_read < 4096)
- {
- if (bytes_read == 0)
- {
- if (!mAutoRestart) mEndOfFile = true;
- return 0;
- }
- }
- // when we fill the stream with enough pages, it'll start spitting out packets
- // which contain keyframes, delta frames or audio data
- while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
- {
- serno = ogg_page_serialno(&mInfo.OggPage);
- if (serno == mInfo.TheoraStreamState.serialno) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
- if (mAudioInterface && serno == mInfo.VorbisStreamState.serialno)
- {
- ogg_int64_t g = ogg_page_granulepos(&mInfo.OggPage);
- audio_time = (float) vorbis_granule_time(&mInfo.VorbisDSPState, g);
- audio_eos = ogg_page_eos(&mInfo.OggPage);
- ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
- }
- }
- if (!(mAudioInterface && !audio_eos && audio_time < time + 1.0f))
- break;
- }
- return 1;
-}
-
-bool TheoraVideoClip_Theora::decodeNextFrame()
-{
- if (mEndOfFile) return 0;
-
- TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
- if (!frame) return 0; // max number of precached frames reached
- bool should_restart = 0;
- ogg_packet opTheora;
- ogg_int64_t granulePos;
- th_ycbcr_buffer buff;
- int ret, nAttempts;
- for (;;)
- {
- // ogg_stream_packetout can return -1 and the official docs suggest to do subsequent calls until it succeeds
- // because the data is out of sync. still will limit the number of attempts just in case
- for (ret = -1, nAttempts = 0; ret < 0 && nAttempts < 100; nAttempts++)
- {
- ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora);
- }
-
- if (ret > 0)
- {
- int status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos);
- if (status != 0 && status != TH_DUPFRAME) continue; // 0 means success
-
- float time = (float) th_granule_time(mInfo.TheoraDecoder, granulePos);
- unsigned long frame_number = (unsigned long) th_granule_frame(mInfo.TheoraDecoder, granulePos);
-
- if (time < mTimer->getTime() && !mRestarted && frame_number % 16 != 0)
- {
- // %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand.
-#ifdef _DEBUG
- th_writelog(mName + ": pre-dropped frame " + str((int) frame_number));
-#endif
- ++mNumDroppedFrames;
- continue; // drop frame
- }
- frame->mTimeToDisplay = time - mFrameDuration;
- frame->mIteration = mIteration;
- frame->_setFrameNumber(frame_number);
- mLastDecodedFrameNumber = frame_number;
- th_decode_ycbcr_out(mInfo.TheoraDecoder, buff);
- TheoraPixelTransform t;
- memset(&t, 0, sizeof(TheoraPixelTransform));
-
- t.y = buff[0].data; t.yStride = buff[0].stride;
- t.u = buff[1].data; t.uStride = buff[1].stride;
- t.v = buff[2].data; t.vStride = buff[2].stride;
- frame->decode(&t);
- break;
- }
- else
- {
- if (!_readData())
- {
- frame->mInUse = 0;
- should_restart = mAutoRestart;
- break;
- }
- }
- }
-
- if (mAudioInterface != NULL)
- {
- mAudioMutex->lock();
- decodeAudio();
- mAudioMutex->unlock();
- }
- if (should_restart)
- {
- ++mIteration;
- _restart();
- }
- return 1;
-}
-
-void TheoraVideoClip_Theora::_restart()
-{
- bool paused = mTimer->isPaused();
- if (!paused) mTimer->pause();
- long granule=0;
- th_decode_ctl(mInfo.TheoraDecoder,TH_DECCTL_SET_GRANPOS,&granule,sizeof(granule));
- th_decode_free(mInfo.TheoraDecoder);
- mInfo.TheoraDecoder=th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup);
- ogg_stream_reset(&mInfo.TheoraStreamState);
- if (mAudioInterface)
- {
- // empty the DSP buffer
- //float **pcm;
- //int len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState,&pcm);
- //if (len) vorbis_synthesis_read(&mInfo.VorbisDSPState,len);
- ogg_packet opVorbis;
- mReadAudioSamples = 0;
- while (ogg_stream_packetout(&mInfo.VorbisStreamState,&opVorbis) > 0)
- {
- if (vorbis_synthesis(&mInfo.VorbisBlock,&opVorbis) == 0)
- vorbis_synthesis_blockin(&mInfo.VorbisDSPState,&mInfo.VorbisBlock);
- }
- ogg_stream_reset(&mInfo.VorbisStreamState);
- }
-
- ogg_sync_reset(&mInfo.OggSyncState);
- mStream->seek(0);
- ogg_int64_t granulePos = 0;
- th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granule));
-
- mEndOfFile = false;
-
- mRestarted = 1;
-
- if (!paused) mTimer->play();
-}
-
-void TheoraVideoClip_Theora::load(TheoraDataSource* source)
-{
-#ifdef _DEBUG
- th_writelog("-----");
-#endif
- mStream = source;
- readTheoraVorbisHeaders();
-
- mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup);
-
- mWidth = mInfo.TheoraInfo.frame_width;
- mHeight = mInfo.TheoraInfo.frame_height;
- mSubFrameWidth = mInfo.TheoraInfo.pic_width;
- mSubFrameHeight = mInfo.TheoraInfo.pic_height;
- mSubFrameOffsetX = mInfo.TheoraInfo.pic_x;
- mSubFrameOffsetY = mInfo.TheoraInfo.pic_y;
- mStride = (mStride == 1) ? mStride = _nextPow2(getWidth()) : getWidth();
- mFPS = mInfo.TheoraInfo.fps_numerator / (float) mInfo.TheoraInfo.fps_denominator;
-
-#ifdef _DEBUG
- th_writelog("width: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS()));
-#endif
- mFrameQueue = new TheoraFrameQueue(this);
- mFrameQueue->setSize(mNumPrecachedFrames);
- // find out the duration of the file by seeking to the end
- // having ogg decode pages, extract the granule pos from
- // the last theora page and seek back to beginning of the file
- long streamSize = mStream->size(), seekPos;
- for (int i = 1; i <= 50; ++i)
- {
- ogg_sync_reset(&mInfo.OggSyncState);
- seekPos = streamSize - 4096 * i;
- if (seekPos < 0) seekPos = 0;
- mStream->seek(seekPos);
-
- char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096 * i);
- int bytes_read = mStream->read(buffer, 4096 * i);
- ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
- ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
-
- for (;;)
- {
- int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage);
- if (ret == 0) break;
- // if page is not a theora page, skip it
- if (ogg_page_serialno(&mInfo.OggPage) != mInfo.TheoraStreamState.serialno) continue;
-
- ogg_int64_t granule = ogg_page_granulepos(&mInfo.OggPage);
- if (granule >= 0)
- {
- mNumFrames = (int) th_granule_frame(mInfo.TheoraDecoder, granule) + 1;
- }
- else if (mNumFrames > 0)
- ++mNumFrames; // append delta frames at the end to get the exact numbe
- }
- if (mNumFrames > 0 || streamSize - 4096 * i < 0) break;
-
- }
- if (mNumFrames < 0)
- th_writelog("unable to determine file duration!");
- else
- {
- mDuration = mNumFrames / mFPS;
-#ifdef _DEBUG
- th_writelog("duration: " + strf(mDuration) + " seconds");
-#endif
- }
- // restore to beginning of stream.
- ogg_sync_reset(&mInfo.OggSyncState);
- mStream->seek(0);
-
- if (mVorbisStreams) // if there is no audio interface factory defined, even though the video
- // clip might have audio, it will be ignored
- {
- vorbis_synthesis_init(&mInfo.VorbisDSPState, &mInfo.VorbisInfo);
- vorbis_block_init(&mInfo.VorbisDSPState, &mInfo.VorbisBlock);
- mNumAudioChannels = mInfo.VorbisInfo.channels;
- mAudioFrequency = (int) mInfo.VorbisInfo.rate;
-
- // create an audio interface instance if available
- TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory();
- printf("**** audio factory is %p\n", audio_factory);
- if (audio_factory) setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency));
- }
-
- mFrameDuration = 1.0f / getFPS();
-#ifdef _DEBUG
- th_writelog("-----");
-#endif
-}
-
-void TheoraVideoClip_Theora::readTheoraVorbisHeaders()
-{
- ogg_packet tempOggPacket;
- bool done = false;
- bool decode_audio=TheoraVideoManager::getSingleton().getAudioInterfaceFactory() != NULL;
- //init Vorbis/Theora Layer
- //Ensure all structures get cleared out.
- memset(&mInfo.OggSyncState, 0, sizeof(ogg_sync_state));
- memset(&mInfo.OggPage, 0, sizeof(ogg_page));
- memset(&mInfo.VorbisStreamState, 0, sizeof(ogg_stream_state));
- memset(&mInfo.TheoraStreamState, 0, sizeof(ogg_stream_state));
- memset(&mInfo.TheoraInfo, 0, sizeof(th_info));
- memset(&mInfo.TheoraComment, 0, sizeof(th_comment));
- memset(&mInfo.VorbisInfo, 0, sizeof(vorbis_info));
- memset(&mInfo.VorbisDSPState, 0, sizeof(vorbis_dsp_state));
- memset(&mInfo.VorbisBlock, 0, sizeof(vorbis_block));
- memset(&mInfo.VorbisComment, 0, sizeof(vorbis_comment));
-
- ogg_sync_init(&mInfo.OggSyncState);
- th_comment_init(&mInfo.TheoraComment);
- th_info_init(&mInfo.TheoraInfo);
- vorbis_info_init(&mInfo.VorbisInfo);
- vorbis_comment_init(&mInfo.VorbisComment);
-
- while (!done)
- {
- char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
- int bytes_read = mStream->read(buffer, 4096);
- ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-
- if (bytes_read == 0)
- break;
-
- while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
- {
- ogg_stream_state OggStateTest;
-
- //is this an initial header? If not, stop
- if (!ogg_page_bos(&mInfo.OggPage))
- {
- //This is done blindly, because stream only accept themselves
- if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
- if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
-
- done=true;
- break;
- }
-
- ogg_stream_init(&OggStateTest, ogg_page_serialno(&mInfo.OggPage));
- ogg_stream_pagein(&OggStateTest, &mInfo.OggPage);
- ogg_stream_packetout(&OggStateTest, &tempOggPacket);
-
- //identify the codec
- int ret;
- if (!mTheoraStreams)
- {
- ret = th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket);
-
- if (ret > 0)
- {
- //This is the Theora Header
- memcpy(&mInfo.TheoraStreamState, &OggStateTest, sizeof(OggStateTest));
- mTheoraStreams = 1;
- continue;
- }
- }
- if (decode_audio && !mVorbisStreams &&
- vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment, &tempOggPacket) >=0)
- {
- //This is vorbis header
- memcpy(&mInfo.VorbisStreamState, &OggStateTest, sizeof(OggStateTest));
- mVorbisStreams = 1;
- continue;
- }
- //Hmm. I guess it's not a header we support, so erase it
- ogg_stream_clear(&OggStateTest);
- }
- }
-
- while ((mTheoraStreams && (mTheoraStreams < 3)) ||
- (mVorbisStreams && (mVorbisStreams < 3)))
- {
- //Check 2nd'dary headers... Theora First
- int iSuccess;
- while (mTheoraStreams && mTheoraStreams < 3 &&
- (iSuccess = ogg_stream_packetout(&mInfo.TheoraStreamState, &tempOggPacket)))
- {
- if (iSuccess < 0)
- throw TheoraGenericException("Error parsing Theora stream headers.");
- if (!th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket))
- throw TheoraGenericException("invalid theora stream");
-
- ++mTheoraStreams;
- } //end while looking for more theora headers
-
- //look 2nd vorbis header packets
- while (mVorbisStreams < 3 && (iSuccess = ogg_stream_packetout(&mInfo.VorbisStreamState, &tempOggPacket)))
- {
- if (iSuccess < 0)
- throw TheoraGenericException("Error parsing vorbis stream headers");
-
- if (vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment,&tempOggPacket))
- throw TheoraGenericException("invalid stream");
-
- ++mVorbisStreams;
- } //end while looking for more vorbis headers
-
- //Not finished with Headers, get some more file data
- if (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
- {
- if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
- if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
- }
- else
- {
- char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
- int bytes_read = mStream->read(buffer, 4096);
- ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-
- if (bytes_read == 0)
- throw TheoraGenericException("End of file found prematurely");
- }
- } //end while looking for all headers
- // writelog("Vorbis Headers: " + str(mVorbisHeaders) + " Theora Headers : " + str(mTheoraHeaders));
-}
-
-void TheoraVideoClip_Theora::decodedAudioCheck()
-{
- if (!mAudioInterface || mTimer->isPaused()) return;
-
- mAudioMutex->lock();
- flushAudioPackets(mAudioInterface);
- mAudioMutex->unlock();
-}
-
-float TheoraVideoClip_Theora::decodeAudio()
-{
- if (mRestarted) return -1;
-
- ogg_packet opVorbis;
- float **pcm;
- int len = 0;
- float timestamp = -1;
- bool read_past_timestamp = 0;
-
- float factor = 1.0f / mAudioFrequency;
- float videoTime = (float) mLastDecodedFrameNumber / mFPS;
- float min = mFrameQueue->getSize() / mFPS + 1.0f;
-
- for (;;)
- {
- len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState, &pcm);
- if (len == 0)
- {
- if (ogg_stream_packetout(&mInfo.VorbisStreamState, &opVorbis) > 0)
- {
- if (vorbis_synthesis(&mInfo.VorbisBlock, &opVorbis) == 0)
- {
- if (timestamp < 0 && opVorbis.granulepos >= 0)
- {
- timestamp = (float) vorbis_granule_time(&mInfo.VorbisDSPState, opVorbis.granulepos);
- }
- else if (timestamp >= 0) read_past_timestamp = 1;
- vorbis_synthesis_blockin(&mInfo.VorbisDSPState, &mInfo.VorbisBlock);
- }
- continue;
- }
- else
- {
- float audioTime = mReadAudioSamples * factor;
- // always buffer up of audio ahead of the frames
- if (audioTime - videoTime < min)
- {
- if (!_readData()) break;
- }
- else
- break;
- }
- }
- addAudioPacket(pcm, len, mAudioGain);
- mReadAudioSamples += len;
- if (read_past_timestamp) timestamp += (float) len / mInfo.VorbisInfo.rate;
- vorbis_synthesis_read(&mInfo.VorbisDSPState, len); // tell vorbis we read a number of samples
- }
- return timestamp;
-}
-
-long TheoraVideoClip_Theora::seekPage(long targetFrame, bool return_keyframe)
-{
- int i,seek_min = 0, seek_max = (int) mStream->size();
- long frame;
- ogg_int64_t granule = 0;
-
- if (targetFrame == 0) mStream->seek(0);
- for (i = (targetFrame == 0) ? 100 : 0; i < 100; ++i)
- {
- ogg_sync_reset(&mInfo.OggSyncState);
- mStream->seek((seek_min + seek_max) / 2); // do a binary search
- memset(&mInfo.OggPage, 0, sizeof(ogg_page));
- ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
-
- for (;i < 1000;)
- {
- int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage);
- if (ret == 1)
- {
- int serno = ogg_page_serialno(&mInfo.OggPage);
- if (serno == mInfo.TheoraStreamState.serialno)
- {
- granule = ogg_page_granulepos(&mInfo.OggPage);
- if (granule >= 0)
- {
- frame = (long) th_granule_frame(mInfo.TheoraDecoder, granule);
- if (frame < targetFrame && targetFrame - frame < 10)
- {
- // we're close enough, let's break this.
- i = 1000;
- break;
- }
- // we're not close enough, let's shorten the borders of the binary search
- if (targetFrame - 1 > frame) seek_min = (seek_min + seek_max) / 2;
- else seek_max = (seek_min + seek_max) / 2;
- break;
- }
- }
- }
- else
- {
- char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
- int bytes_read = mStream->read(buffer, 4096);
- if (bytes_read == 0) break;
- ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
- }
- }
- }
- if (return_keyframe) return (long) (granule >> mInfo.TheoraInfo.keyframe_granule_shift);
-
- ogg_sync_reset(&mInfo.OggSyncState);
- memset(&mInfo.OggPage, 0, sizeof(ogg_page));
- ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
- if (targetFrame == 0) return -1;
- mStream->seek((seek_min + seek_max) / 2); // do a binary search
- return -1;
-}
-
-void TheoraVideoClip_Theora::doSeek()
-{
-#if _DEBUG
- th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame));
-#endif
- int frame;
- float time = mSeekFrame / getFPS();
- mTimer->seek(time);
- bool paused = mTimer->isPaused();
- if (!paused) mTimer->pause(); // pause until seeking is done
-
- mEndOfFile = false;
- mRestarted = false;
-
- resetFrameQueue();
- // reset the video decoder.
- ogg_stream_reset(&mInfo.TheoraStreamState);
- th_decode_free(mInfo.TheoraDecoder);
- mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo, mInfo.TheoraSetup);
-
- if (mAudioInterface)
- {
- mAudioMutex->lock();
- ogg_stream_reset(&mInfo.VorbisStreamState);
- vorbis_synthesis_restart(&mInfo.VorbisDSPState);
- destroyAllAudioPackets();
- }
- // first seek to desired frame, then figure out the location of the
- // previous keyframe and seek to it.
- // then by setting the correct time, the decoder will skip N frames untill
- // we get the frame we want.
- frame = (int) seekPage(mSeekFrame, 1); // find the keyframe nearest to the target frame
-#ifdef _DEBUG
- // th_writelog(mName + " [seek]: nearest keyframe for frame " + str(mSeekFrame) + " is frame: " + str(frame));
-#endif
- seekPage(std::max(0, frame - 1), 0);
-
- ogg_packet opTheora;
- ogg_int64_t granulePos;
- bool granule_set = 0;
- if (frame <= 1)
- {
- if (mInfo.TheoraInfo.version_major == 3 && mInfo.TheoraInfo.version_minor == 2 && mInfo.TheoraInfo.version_subminor == 0)
- granulePos = 0;
- else
- granulePos = 1; // because of difference in granule interpretation in theora streams 3.2.0 and newer ones
- th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granulePos));
- granule_set = 1;
- }
-
- // now that we've found the keyframe that preceeds our desired frame, lets keep on decoding frames until we
- // reach our target frame.
-
- int status, ret;
- for (;mSeekFrame != 0;)
- {
- ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora);
- if (ret > 0)
- {
- if (!granule_set)
- {
- // theora decoder requires to set the granule pos after seek to be able to determine the current frame
- if (opTheora.granulepos >= 0)
- {
- th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &opTheora.granulepos, sizeof(opTheora.granulepos));
- granule_set = 1;
- }
- else continue; // ignore prev delta frames until we hit a keyframe
- }
- status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos);
- if (status != 0 && status != TH_DUPFRAME) continue;
- frame = (int) th_granule_frame(mInfo.TheoraDecoder, granulePos);
- if (frame >= mSeekFrame - 1) break;
- }
- else
- {
- if (!_readData())
- {
- th_writelog(mName + " [seek]: fineseeking failed, _readData failed!");
- if (mAudioInterface) mAudioMutex->unlock();
- return;
- }
- }
- }
-#ifdef _DEBUG
- // th_writelog(mName + " [seek]: fineseeked to frame " + str(frame + 1) + ", requested: " + str(mSeekFrame));
-#endif
- if (mAudioInterface)
- {
- // read audio data until we reach a timestamp. this usually takes only one iteration, but just in case let's
- // wrap it in a loop
- float timestamp;
- for (;;)
- {
- timestamp = decodeAudio();
- if (timestamp >= 0) break;
- else _readData();
- }
- float rate = (float) mAudioFrequency * mNumAudioChannels;
- float queued_time = getAudioPacketQueueLength();
- // at this point there are only 2 possibilities: either we have too much packets and we have to delete
- // the first N ones, or we don't have enough, so let's fill the gap with silence.
- if (time > timestamp - queued_time)
- {
- while (mTheoraAudioPacketQueue != NULL)
- {
- if (time > timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate)
- {
- queued_time -= mTheoraAudioPacketQueue->numSamples / rate;
- destroyAudioPacket(popAudioPacket());
- }
- else
- {
- int n_trim = (int) ((timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate - time) * rate);
- if (mTheoraAudioPacketQueue->numSamples - n_trim <= 0)
- destroyAudioPacket(popAudioPacket()); // if there's no data to be left, just destroy it
- else
- {
- for (int i = n_trim, j = 0; i < mTheoraAudioPacketQueue->numSamples; ++i, ++j)
- mTheoraAudioPacketQueue->pcm[j] = mTheoraAudioPacketQueue->pcm[i];
- mTheoraAudioPacketQueue->numSamples -= n_trim;
- }
- break;
- }
- }
- }
- else
- {
- // expand the first packet with silence.
- if (mTheoraAudioPacketQueue) // just in case!
- {
- int i, j, nmissing = (int) ((timestamp - queued_time - time) * rate);
- if (nmissing > 0)
- {
- float* samples = new float[nmissing + mTheoraAudioPacketQueue->numSamples];
- for (i = 0; i < nmissing; ++i) samples[i] = 0;
- for (j = 0; i < nmissing + mTheoraAudioPacketQueue->numSamples; ++i, ++j)
- samples[i] = mTheoraAudioPacketQueue->pcm[j];
- delete [] mTheoraAudioPacketQueue->pcm;
- mTheoraAudioPacketQueue->pcm = samples;
- }
- }
- }
- mLastDecodedFrameNumber = mSeekFrame;
- mReadAudioSamples = (unsigned int) (timestamp * mAudioFrequency);
-
- mAudioMutex->unlock();
- }
- if (!paused) mTimer->play();
- mSeekFrame = -1;
-}
-#endif
diff --git a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h b/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h
deleted file mode 100644
index c64c183029..0000000000
--- a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#if defined(__THEORA) && !defined(_TheoraVideoClip_Theora_h)
-#define _TheoraVideoClip_Theora_h
-
-#include <ogg/ogg.h>
-#include <vorbis/vorbisfile.h>
-#include <theora/theoradec.h>
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraVideoClip.h"
-
-struct TheoraInfoStruct
-{
- // ogg/vorbis/theora variables
- ogg_sync_state OggSyncState;
- ogg_page OggPage;
- ogg_stream_state VorbisStreamState;
- ogg_stream_state TheoraStreamState;
- //Theora State
- th_info TheoraInfo;
- th_comment TheoraComment;
- th_setup_info* TheoraSetup;
- th_dec_ctx* TheoraDecoder;
- //Vorbis State
- vorbis_info VorbisInfo;
- vorbis_dsp_state VorbisDSPState;
- vorbis_block VorbisBlock;
- vorbis_comment VorbisComment;
-};
-
-class TheoraVideoClip_Theora : public TheoraVideoClip, public TheoraAudioPacketQueue
-{
-protected:
- TheoraInfoStruct mInfo; // a pointer is used to avoid having to include theora & vorbis headers
- int mTheoraStreams, mVorbisStreams; // Keeps track of Theora and Vorbis Streams
-
- long seekPage(long targetFrame, bool return_keyframe);
- void doSeek();
- void readTheoraVorbisHeaders();
- unsigned int mReadAudioSamples;
- unsigned long mLastDecodedFrameNumber;
-public:
- TheoraVideoClip_Theora(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride);
- ~TheoraVideoClip_Theora();
-
- bool _readData();
- bool decodeNextFrame();
- void _restart();
- void load(TheoraDataSource* source);
- float decodeAudio();
- void decodedAudioCheck();
- std::string getDecoderName() { return "Theora"; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/src/TheoraAsync.cpp b/drivers/theoraplayer/src/TheoraAsync.cpp
deleted file mode 100644
index cc3b7a4bf5..0000000000
--- a/drivers/theoraplayer/src/TheoraAsync.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifdef _WIN32
-#include <windows.h>
-#else
-#include <unistd.h>
-#include <pthread.h>
-#endif
-
-#include "TheoraAsync.h"
-#include "TheoraUtil.h"
-
-#ifdef _WINRT
-#include <wrl.h>
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Mutex
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-TheoraMutex::TheoraMutex()
-{
-#ifdef _WIN32
-#ifndef _WINRT // WinXP does not have CreateTheoraMutexEx()
- mHandle = CreateMutex(0, 0, 0);
-#else
- mHandle = CreateMutexEx(NULL, NULL, 0, SYNCHRONIZE);
-#endif
-#else
- mHandle = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
- pthread_mutex_init((pthread_mutex_t*)mHandle, 0);
-#endif
-}
-
-TheoraMutex::~TheoraMutex()
-{
-#ifdef _WIN32
- CloseHandle(mHandle);
-#else
- pthread_mutex_destroy((pthread_mutex_t*)mHandle);
- free((pthread_mutex_t*)mHandle);
- mHandle = NULL;
-#endif
-}
-
-void TheoraMutex::lock()
-{
-#ifdef _WIN32
- WaitForSingleObjectEx(mHandle, INFINITE, FALSE);
-#else
- pthread_mutex_lock((pthread_mutex_t*)mHandle);
-#endif
-}
-
-void TheoraMutex::unlock()
-{
-#ifdef _WIN32
- ReleaseMutex(mHandle);
-#else
- pthread_mutex_unlock((pthread_mutex_t*)mHandle);
-#endif
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Thread
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef _WINRT
-using namespace Windows::Foundation;
-using namespace Windows::System::Threading;
-#endif
-
-#ifdef _WIN32
-unsigned long WINAPI theoraAsyncCall(void* param)
-#else
-void* theoraAsyncCall(void* param)
-#endif
-{
- TheoraThread* t = (TheoraThread*)param;
- t->execute();
-#ifdef _WIN32
- return 0;
-#else
- pthread_exit(NULL);
- return NULL;
-#endif
-}
-
-#ifdef _WINRT
-struct TheoraAsyncActionWrapper
-{
-public:
- IAsyncAction^ mAsyncAction;
- TheoraAsyncActionWrapper(IAsyncAction^ asyncAction)
- {
- mAsyncAction = asyncAction;
- }
-};
-#endif
-
-TheoraThread::TheoraThread() : mRunning(false), mId(0)
-{
-#ifndef _WIN32
- mId = (pthread_t*)malloc(sizeof(pthread_t));
-#endif
-}
-
-TheoraThread::~TheoraThread()
-{
- if (mRunning)
- {
- stop();
- }
- if (mId != NULL)
- {
-#ifdef _WIN32
-#ifndef _WINRT
- CloseHandle(mId);
-#else
- delete mId;
-#endif
-#else
- free((pthread_t*)mId);
-#endif
- mId = NULL;
- }
-}
-
-void TheoraThread::start()
-{
- mRunning = true;
-#ifdef _WIN32
-#ifndef _WINRT
- mId = CreateThread(0, 0, &theoraAsyncCall, this, 0, 0);
-#else
- mId = new TheoraAsyncActionWrapper(ThreadPool::RunAsync(
- ref new WorkItemHandler([&](IAsyncAction^ work_item)
- {
- execute();
- }),
- WorkItemPriority::Normal, WorkItemOptions::TimeSliced));
-#endif
-#else
- pthread_create((pthread_t*)mId, NULL, &theoraAsyncCall, this);
-#endif
-}
-
-bool TheoraThread::isRunning()
-{
- bool ret;
- mRunningMutex.lock();
- ret = mRunning;
- mRunningMutex.unlock();
-
- return ret;
-}
-
-void TheoraThread::join()
-{
- mRunningMutex.lock();
- mRunning = false;
- mRunningMutex.unlock();
-#ifdef _WIN32
-#ifndef _WINRT
- WaitForSingleObject(mId, INFINITE);
- if (mId != NULL)
- {
- CloseHandle(mId);
- mId = NULL;
- }
-#else
- IAsyncAction^ action = ((TheoraAsyncActionWrapper*)mId)->mAsyncAction;
- int i = 0;
- while (action->Status != AsyncStatus::Completed &&
- action->Status != AsyncStatus::Canceled &&
- action->Status != AsyncStatus::Error &&
- i < 100)
- {
- _psleep(50);
- ++i;
- }
- if (i >= 100)
- {
- i = 0;
- action->Cancel();
- while (action->Status != AsyncStatus::Completed &&
- action->Status != AsyncStatus::Canceled &&
- action->Status != AsyncStatus::Error &&
- i < 100)
- {
- _psleep(50);
- ++i;
- }
- }
-#endif
-#else
- pthread_join(*((pthread_t*)mId), 0);
-#endif
-}
-
-void TheoraThread::resume()
-{
-#ifdef _WIN32
-#ifndef _WINRT
- ResumeThread(mId);
-#else
- // not available in WinRT
-#endif
-#endif
-}
-
-void TheoraThread::pause()
-{
-#ifdef _WIN32
-#ifndef _WINRT
- SuspendThread(mId);
-#else
- // not available in WinRT
-#endif
-#endif
-}
-
-void TheoraThread::stop()
-{
- if (mRunning)
- {
- mRunningMutex.lock();
- mRunning = false;
- mRunningMutex.unlock();
-#ifdef _WIN32
-#ifndef _WINRT
- TerminateThread(mId, 0);
-#else
- ((TheoraAsyncActionWrapper*)mId)->mAsyncAction->Cancel();
-#endif
-#elif defined(_ANDROID)
- pthread_kill(*((pthread_t*)mId), 0);
-#else
- pthread_cancel(*((pthread_t*)mId));
-#endif
- }
-}
-
diff --git a/drivers/theoraplayer/src/TheoraAudioInterface.cpp b/drivers/theoraplayer/src/TheoraAudioInterface.cpp
deleted file mode 100644
index a265cb57b5..0000000000
--- a/drivers/theoraplayer/src/TheoraAudioInterface.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraAudioInterface.h"
-
-TheoraAudioInterface::TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq)
-{
- mFreq = freq;
- mNumChannels = nChannels;
- mClip = owner;
-}
-
-TheoraAudioInterface::~TheoraAudioInterface()
-{
-
-}
diff --git a/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp b/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp
deleted file mode 100644
index be5e1018f9..0000000000
--- a/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <stdlib.h>
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraAudioInterface.h"
-
-TheoraAudioPacketQueue::TheoraAudioPacketQueue()
-{
- mTheoraAudioPacketQueue = NULL;
-}
-
-TheoraAudioPacketQueue::~TheoraAudioPacketQueue()
-{
- destroyAllAudioPackets();
-}
-
-float TheoraAudioPacketQueue::getAudioPacketQueueLength()
-{
- float len = 0;
- for (TheoraAudioPacket* p = mTheoraAudioPacketQueue; p != NULL; p = p->next)
- len += p->numSamples;
-
- return len / (mAudioFrequency * mNumAudioChannels);
-}
-
-void TheoraAudioPacketQueue::_addAudioPacket(float* data, int numSamples)
-{
- TheoraAudioPacket* packet = new TheoraAudioPacket;
- packet->pcm = data;
- packet->numSamples = numSamples;
- packet->next = NULL;
-
-
- if (mTheoraAudioPacketQueue == NULL) mTheoraAudioPacketQueue = packet;
- else
- {
- TheoraAudioPacket* last = mTheoraAudioPacketQueue;
- for (TheoraAudioPacket* p = last; p != NULL; p = p->next)
- last = p;
- last->next = packet;
- }
-}
-
-void TheoraAudioPacketQueue::addAudioPacket(float** buffer, int numSamples, float gain)
-{
- float* data = new float[numSamples * mNumAudioChannels];
- float* dataptr = data;
- int i;
- unsigned int j;
-
- if (gain < 1.0f)
- {
- // apply gain, let's attenuate the samples
- for (i = 0; i < numSamples; ++i)
- for (j = 0; j < mNumAudioChannels; j++, ++dataptr)
- *dataptr = buffer[i][j] * gain;
- }
- else
- {
- // do a simple copy, faster then the above method, when gain is 1.0f
- for (i = 0; i < numSamples; ++i)
- for (j = 0; j < mNumAudioChannels; j++, ++dataptr)
- *dataptr = buffer[j][i];
- }
-
- _addAudioPacket(data, numSamples * mNumAudioChannels);
-}
-
-void TheoraAudioPacketQueue::addAudioPacket(float* buffer, int numSamples, float gain)
-{
- float* data = new float[numSamples * mNumAudioChannels];
- float* dataptr = data;
- int i, numFloats = numSamples * mNumAudioChannels;
-
- if (gain < 1.0f)
- {
- // apply gain, let's attenuate the samples
- for (i = 0; i < numFloats; ++i, dataptr++)
- *dataptr = buffer[i] * gain;
- }
- else
- {
- // do a simple copy, faster then the above method, when gain is 1.0f
- for (i = 0; i < numFloats; ++i, dataptr++)
- *dataptr = buffer[i];
- }
-
- _addAudioPacket(data, numFloats);
-}
-
-TheoraAudioPacket* TheoraAudioPacketQueue::popAudioPacket()
-{
- if (mTheoraAudioPacketQueue == NULL) return NULL;
- TheoraAudioPacket* p = mTheoraAudioPacketQueue;
- mTheoraAudioPacketQueue = mTheoraAudioPacketQueue->next;
- return p;
-}
-
-void TheoraAudioPacketQueue::destroyAudioPacket(TheoraAudioPacket* p)
-{
- if (p == NULL) return;
- delete [] p->pcm;
- delete p;
-}
-
-void TheoraAudioPacketQueue::destroyAllAudioPackets()
-{
- for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket())
- destroyAudioPacket(p);
-}
-
-void TheoraAudioPacketQueue::flushAudioPackets(TheoraAudioInterface* audioInterface)
-{
-
- for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket())
- {
- audioInterface->insertData(p->pcm, p->numSamples);
- destroyAudioPacket(p);
- }
-} \ No newline at end of file
diff --git a/drivers/theoraplayer/src/TheoraDataSource.cpp b/drivers/theoraplayer/src/TheoraDataSource.cpp
deleted file mode 100644
index 6011dc6783..0000000000
--- a/drivers/theoraplayer/src/TheoraDataSource.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <stdio.h>
-#include <memory.h>
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#include "TheoraVideoManager.h"
-#include "TheoraUtil.h"
-
-TheoraDataSource::~TheoraDataSource()
-{
-
-}
-
-TheoraFileDataSource::TheoraFileDataSource(std::string filename)
-{
- mFilename = filename;
- mFilePtr = NULL;
-}
-
-TheoraFileDataSource::~TheoraFileDataSource()
-{
- if (mFilePtr)
- {
- fclose(mFilePtr);
- mFilePtr = NULL;
- }
-}
-
-void TheoraFileDataSource::openFile()
-{
- if (mFilePtr == NULL)
- {
- mFilePtr=fopen(mFilename.c_str(), "rb");
- if (!mFilePtr)
- {
- std::string msg = "Can't open video file: " + mFilename;
- th_writelog(msg);
- throw TheoraGenericException(msg);
- }
- fseek(mFilePtr, 0, SEEK_END);
- mSize = ftell(mFilePtr);
- fseek(mFilePtr, 0, SEEK_SET);
- }
-}
-
-int TheoraFileDataSource::read(void* output, int nBytes)
-{
- if (mFilePtr == NULL) openFile();
- size_t n = fread(output, 1, nBytes, mFilePtr);
- return (int) n;
-}
-
-void TheoraFileDataSource::seek(unsigned long byte_index)
-{
- if (mFilePtr == NULL) openFile();
- fseek(mFilePtr, byte_index, SEEK_SET);
-}
-
-unsigned long TheoraFileDataSource::size()
-{
- if (mFilePtr == NULL) openFile();
- return mSize;
-}
-
-unsigned long TheoraFileDataSource::tell()
-{
- if (mFilePtr == NULL) return 0;
- return ftell(mFilePtr);
-}
-
-TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(std::string filename) :
- mReadPointer(0),
- mData(0)
-{
- mFilename=filename;
- FILE* f=fopen(filename.c_str(),"rb");
- if (!f) throw TheoraGenericException("Can't open video file: "+filename);
- fseek(f,0,SEEK_END);
- mSize=ftell(f);
- fseek(f,0,SEEK_SET);
- mData=new unsigned char[mSize];
- fread(mData,1,mSize,f);
- fclose(f);
-}
-
-TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename)
-{
- mFilename = filename;
- mData = data;
- mSize = size;
- mReadPointer = 0;
-}
-
-TheoraMemoryFileDataSource::~TheoraMemoryFileDataSource()
-{
- if (mData) delete [] mData;
-}
-
-int TheoraMemoryFileDataSource::read(void* output, int nBytes)
-{
- int n = (int) ((mReadPointer+nBytes <= mSize) ? nBytes : mSize - mReadPointer);
- if (!n) return 0;
- memcpy(output, mData + mReadPointer, n);
- mReadPointer += n;
- return n;
-}
-
-void TheoraMemoryFileDataSource::seek(unsigned long byte_index)
-{
- mReadPointer=byte_index;
-}
-
-unsigned long TheoraMemoryFileDataSource::size()
-{
- return mSize;
-}
-
-unsigned long TheoraMemoryFileDataSource::tell()
-{
- return mReadPointer;
-}
diff --git a/drivers/theoraplayer/src/TheoraException.cpp b/drivers/theoraplayer/src/TheoraException.cpp
deleted file mode 100644
index 4588a81397..0000000000
--- a/drivers/theoraplayer/src/TheoraException.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraException.h"
-#include "TheoraUtil.h"
-#include "TheoraVideoManager.h"
-#include <stdio.h>
-
-_TheoraGenericException::_TheoraGenericException(const std::string& errorText, std::string type, std::string file, int line)
-{
- mErrText = errorText;
- int src = (int) file.find("src");
- if (src >= 0) file = file.substr(src + 4, 1000);
- mLineNumber = line;
- mFile = file;
-}
-
-
-std::string _TheoraGenericException::repr()
-{
- std::string text = getType();
- if (text != "") text += ": ";
-
- if (mFile != "") text += "[" + mFile + ":" + str(mLineNumber) + "] - ";
-
- return text + getErrorText();
-}
-
-void _TheoraGenericException::writeOutput()
-{
- th_writelog("----------------\nException Error!\n\n" + repr() + "\n----------------");
-}
diff --git a/drivers/theoraplayer/src/TheoraFrameQueue.cpp b/drivers/theoraplayer/src/TheoraFrameQueue.cpp
deleted file mode 100644
index f402144795..0000000000
--- a/drivers/theoraplayer/src/TheoraFrameQueue.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-#include "TheoraUtil.h"
-
-
-TheoraFrameQueue::TheoraFrameQueue(TheoraVideoClip* parent)
-{
- mParent = parent;
-}
-
-TheoraFrameQueue::~TheoraFrameQueue()
-{
- foreach_l(TheoraVideoFrame*, mQueue)
- {
- delete (*it);
- }
- mQueue.clear();
-}
-
-TheoraVideoFrame* TheoraFrameQueue::createFrameInstance(TheoraVideoClip* clip)
-{
- TheoraVideoFrame* frame = new TheoraVideoFrame(clip);
- if (frame->getBuffer() == NULL) // This can happen if you run out of memory
- {
- delete frame;
- return NULL;
- }
- return frame;
-}
-
-void TheoraFrameQueue::setSize(int n)
-{
- mMutex.lock();
- if (mQueue.size() > 0)
- {
- foreach_l (TheoraVideoFrame*, mQueue)
- {
- delete (*it);
- }
- mQueue.clear();
- }
- TheoraVideoFrame* frame;
- for (int i = 0;i < n; ++i)
- {
- frame = createFrameInstance(mParent);
- if (frame != NULL) mQueue.push_back(frame);
- else
- {
- TheoraVideoManager::getSingleton().logMessage("TheoraFrameQueue: unable to create " + str(n) + " frames, out of memory. Created " + str((int) mQueue.size()) + " frames.");
- break;
- }
- }
- mMutex.unlock();
-}
-
-int TheoraFrameQueue::getSize()
-{
- return (int) mQueue.size();
-}
-
-TheoraVideoFrame* TheoraFrameQueue::_getFirstAvailableFrame()
-{
- TheoraVideoFrame* frame = mQueue.front();
- if (frame->mReady) return frame;
- else return NULL;
-}
-
-TheoraVideoFrame* TheoraFrameQueue::getFirstAvailableFrame()
-{
- mMutex.lock();
- TheoraVideoFrame* frame = _getFirstAvailableFrame();
- mMutex.unlock();
- return frame;
-}
-
-void TheoraFrameQueue::clear()
-{
- mMutex.lock();
- foreach_l (TheoraVideoFrame*, mQueue)
- (*it)->clear();
- mMutex.unlock();
-}
-
-void TheoraFrameQueue::_pop(int n)
-{
- for (int i = 0; i < n; ++i)
- {
- TheoraVideoFrame* first = mQueue.front();
- first->clear();
- mQueue.pop_front();
- mQueue.push_back(first);
- }
-}
-
-void TheoraFrameQueue::pop(int n)
-{
- mMutex.lock();
- _pop(n);
- mMutex.unlock();
-}
-
-TheoraVideoFrame* TheoraFrameQueue::requestEmptyFrame()
-{
- TheoraVideoFrame* frame = NULL;
- mMutex.lock();
- foreach_l (TheoraVideoFrame*, mQueue)
- {
- if (!(*it)->mInUse)
- {
- (*it)->mInUse = 1;
- (*it)->mReady = 0;
- frame = (*it);
- break;
- }
- }
- mMutex.unlock();
- return frame;
-}
-
-int TheoraFrameQueue::getUsedCount()
-{
- mMutex.lock();
- int n=0;
- foreach_l(TheoraVideoFrame*,mQueue)
- if ((*it)->mInUse) ++n;
- mMutex.unlock();
- return n;
-}
-
-int TheoraFrameQueue::_getReadyCount()
-{
- int n = 0;
- foreach_l (TheoraVideoFrame*, mQueue)
- if ((*it)->mReady) ++n;
- return n;
-}
-
-
-int TheoraFrameQueue::getReadyCount()
-{
- mMutex.lock();
- int n = _getReadyCount();
- mMutex.unlock();
- return n;
-}
-
-bool TheoraFrameQueue::isFull()
-{
- return getReadyCount() == mQueue.size();
-}
-
-void TheoraFrameQueue::lock()
-{
- mMutex.lock();
-}
-
-void TheoraFrameQueue::unlock()
-{
- mMutex.unlock();
-}
-
-std::list<TheoraVideoFrame*>& TheoraFrameQueue::_getFrameQueue()
-{
- return mQueue;
-}
diff --git a/drivers/theoraplayer/src/TheoraTimer.cpp b/drivers/theoraplayer/src/TheoraTimer.cpp
deleted file mode 100644
index 644d1c2ab7..0000000000
--- a/drivers/theoraplayer/src/TheoraTimer.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraTimer.h"
-
-TheoraTimer::TheoraTimer()
-{
- mTime = 0;
- mPaused = 0;
- mSpeed = 1.0f;
-}
-
-TheoraTimer::~TheoraTimer()
-{
-
-}
-
-void TheoraTimer::update(float timeDelta)
-{
- if (!isPaused())
- {
- mTime += timeDelta * mSpeed;
- }
-}
-
-float TheoraTimer::getTime()
-{
- return mTime;
-}
-
-void TheoraTimer::pause()
-{
- mPaused = true;
-}
-
-void TheoraTimer::play()
-{
- mPaused = false;
-}
-
-
-bool TheoraTimer::isPaused()
-{
- return mPaused;
-}
-
-void TheoraTimer::stop()
-{
-
-}
-
-void TheoraTimer::seek(float time)
-{
- mTime = time;
-}
-
-void TheoraTimer::setSpeed(float speed)
-{
- mSpeed = speed;
-}
-
-float TheoraTimer::getSpeed()
-{
- return mSpeed;
-}
diff --git a/drivers/theoraplayer/src/TheoraUtil.cpp b/drivers/theoraplayer/src/TheoraUtil.cpp
deleted file mode 100644
index 8f1ad0c9c1..0000000000
--- a/drivers/theoraplayer/src/TheoraUtil.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <stdio.h>
-#include <algorithm>
-#include <math.h>
-#include <map>
-#ifndef _WIN32
-#include <unistd.h>
-#include <pthread.h>
-#endif
-
-#include "TheoraUtil.h"
-#include "TheoraException.h"
-
-#ifdef _WIN32
-#include <windows.h>
-#pragma warning( disable: 4996 ) // MSVC++
-#endif
-
-std::string str(int i)
-{
- char s[32];
- sprintf(s, "%d", i);
- return std::string(s);
-}
-
-std::string strf(float i)
-{
- char s[32];
- sprintf(s, "%.3f", i);
- return std::string(s);
-}
-
-void _psleep(int miliseconds)
-{
-#ifdef _WIN32
-#ifndef _WINRT
- Sleep(miliseconds);
-#else
- WaitForSingleObjectEx(GetCurrentThread(), miliseconds, 0);
-#endif
-#else
- usleep(miliseconds * 1000);
-#endif
-}
-
-
-int _nextPow2(int x)
-{
- int y;
- for (y = 1; y < x; y *= 2);
- return y;
-}
diff --git a/drivers/theoraplayer/src/TheoraVideoClip.cpp b/drivers/theoraplayer/src/TheoraVideoClip.cpp
deleted file mode 100644
index 16897ee80e..0000000000
--- a/drivers/theoraplayer/src/TheoraVideoClip.cpp
+++ /dev/null
@@ -1,496 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraVideoClip.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraAudioInterface.h"
-#include "TheoraTimer.h"
-#include "TheoraDataSource.h"
-#include "TheoraUtil.h"
-#include "TheoraException.h"
-
-#include "core/os/memory.h"
-
-TheoraVideoClip::TheoraVideoClip(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int nPrecachedFrames,
- bool usePower2Stride):
- mAudioInterface(NULL),
- mNumDroppedFrames(0),
- mNumDisplayedFrames(0),
- mSeekFrame(-1),
- mDuration(-1),
- mNumFrames(-1),
- mFPS(1),
- mUseAlpha(0),
- mFrameDuration(0),
- mName(data_source->repr()),
- mStride(usePower2Stride),
- mSubFrameWidth(0),
- mSubFrameHeight(0),
- mSubFrameOffsetX(0),
- mSubFrameOffsetY(0),
- mAudioGain(1),
- mRequestedOutputMode(output_mode),
- mAutoRestart(0),
- mEndOfFile(0),
- mRestarted(0),
- mIteration(0),
- mPlaybackIteration(0),
- mStream(0),
- mThreadAccessCount(0),
- mPriority(1),
- mFirstFrameDisplayed(0),
- mWaitingForCache(false),
- mOutputMode(TH_UNDEFINED)
-{
-
- audio_track=0;
- mAudioMutex = NULL;
- mThreadAccessMutex = new TheoraMutex();
- mTimer = mDefaultTimer = new TheoraTimer();
-
- mFrameQueue = NULL;
- mAssignedWorkerThread = NULL;
- mNumPrecachedFrames = nPrecachedFrames;
- setOutputMode(output_mode);
-}
-
-TheoraVideoClip::~TheoraVideoClip()
-{
- // wait untill a worker thread is done decoding the frame
- mThreadAccessMutex->lock();
-
- delete mDefaultTimer;
-
- if (mStream) memdelete(mStream);
-
- if (mFrameQueue) delete mFrameQueue;
-
- if (mAudioInterface)
- {
- mAudioMutex->lock(); // ensure a thread isn't using this mutex
- delete mAudioInterface; // notify audio interface it's time to call it a day
- mAudioMutex ->unlock();
- delete mAudioMutex;
- }
-
- mThreadAccessMutex->unlock();
-
- delete mThreadAccessMutex;
-}
-
-TheoraTimer* TheoraVideoClip::getTimer()
-{
- return mTimer;
-}
-
-void TheoraVideoClip::setTimer(TheoraTimer* timer)
-{
- if (!timer) mTimer = mDefaultTimer;
- else mTimer = timer;
-}
-
-void TheoraVideoClip::resetFrameQueue()
-{
- mFrameQueue->clear();
- mPlaybackIteration = mIteration = 0;
-}
-
-void TheoraVideoClip::restart()
-{
- mEndOfFile = true; //temp, to prevent threads to decode while restarting
- mThreadAccessMutex->lock();
- _restart();
- mTimer->seek(0);
- mFirstFrameDisplayed = false;
- resetFrameQueue();
- mEndOfFile = false;
- mRestarted = false;
- mSeekFrame = -1;
- mThreadAccessMutex->unlock();
-}
-
-void TheoraVideoClip::update(float timeDelta)
-{
- if (mTimer->isPaused())
- {
- mTimer->update(0); // update timer in case there is some code that needs to execute each frame
- return;
- }
- float time = mTimer->getTime(), speed = mTimer->getSpeed();
- if (time + timeDelta * speed >= mDuration)
- {
- if (mAutoRestart && mRestarted)
- {
- float seekTime = time + timeDelta * speed;
- for (;seekTime >= mDuration;)
- {
- seekTime -= mDuration;
- ++mPlaybackIteration;
- }
-
- mTimer->seek(seekTime);
- }
- else
- {
- if (time != mDuration)
- {
- mTimer->update((mDuration - time) / speed);
- }
- }
- }
- else
- {
- mTimer->update(timeDelta);
- }
-}
-
-float TheoraVideoClip::updateToNextFrame()
-{
- TheoraVideoFrame* f = mFrameQueue->getFirstAvailableFrame();
- if (!f) return 0;
-
- float time = f->mTimeToDisplay - mTimer->getTime();
- update(time);
- return time;
-}
-
-TheoraFrameQueue* TheoraVideoClip::getFrameQueue()
-{
- return mFrameQueue;
-}
-
-void TheoraVideoClip::popFrame()
-{
- ++mNumDisplayedFrames;
-
- // after transfering frame data to the texture, free the frame
- // so it can be used again
- if (!mFirstFrameDisplayed)
- {
- mFrameQueue->lock();
- mFrameQueue->_pop(1);
- mFirstFrameDisplayed = true;
- mFrameQueue->unlock();
- }
- else
- {
- mFrameQueue->pop();
- }
-}
-
-int TheoraVideoClip::getWidth()
-{
- return mUseAlpha ? mWidth / 2 : mWidth;
-}
-
-int TheoraVideoClip::getHeight()
-{
- return mHeight;
-}
-
-int TheoraVideoClip::getSubFrameWidth()
-{
- return mUseAlpha ? mWidth / 2 : mSubFrameWidth;
-}
-
-int TheoraVideoClip::getSubFrameHeight()
-{
- return mUseAlpha ? mHeight : mSubFrameHeight;
-}
-
-int TheoraVideoClip::getSubFrameOffsetX()
-{
- return mUseAlpha ? 0 : mSubFrameOffsetX;
-}
-
-int TheoraVideoClip::getSubFrameOffsetY()
-{
- return mUseAlpha ? 0 : mSubFrameOffsetY;
-}
-
-float TheoraVideoClip::getAbsPlaybackTime()
-{
- return mTimer->getTime() + mPlaybackIteration * mDuration;
-}
-
-int TheoraVideoClip::discardOutdatedFrames(float absTime)
-{
- int nReady = mFrameQueue->_getReadyCount();
- // only drop frames if you have more frames to show. otherwise even the late frame will do..
- if (nReady == 1) return 0;
- float time = absTime;
-
- int nPop = 0;
- TheoraVideoFrame* frame;
- float timeToDisplay;
-
- std::list<TheoraVideoFrame*>& queue = mFrameQueue->_getFrameQueue();
- foreach_l (TheoraVideoFrame*, queue)
- {
- frame = *it;
- if (!frame->mReady) break;
- timeToDisplay = frame->mTimeToDisplay + frame->mIteration * mDuration;
- if (time > timeToDisplay + mFrameDuration)
- {
- ++nPop;
- if (nReady - nPop == 1) break; // always leave at least one in the queue
- }
- else break;
- }
-
- if (nPop > 0)
- {
-#define _DEBUG
-#ifdef _DEBUG
- std::string log = getName() + ": dropped frame ";
-
- int i = nPop;
- foreach_l (TheoraVideoFrame*, queue)
- {
- log += str((int) (*it)->getFrameNumber());
- if (i-- > 1)
- {
- log += ", ";
- }
- else break;
- }
- th_writelog(log);
-#endif
- mNumDroppedFrames += nPop;
- mFrameQueue->_pop(nPop);
- }
-
- return nPop;
-}
-
-TheoraVideoFrame* TheoraVideoClip::getNextFrame()
-{
- TheoraVideoFrame* frame;
- // if we are about to seek, then the current frame queue is invalidated
- // (will be cleared when a worker thread does the actual seek)
- if (mSeekFrame != -1) return NULL;
-
- mFrameQueue->lock();
- float time = getAbsPlaybackTime();
- discardOutdatedFrames(time);
-
- frame = mFrameQueue->_getFirstAvailableFrame();
- if (frame != NULL)
- {
- if (frame->mTimeToDisplay + frame->mIteration * mDuration > time && mFirstFrameDisplayed)
- {
- frame = NULL; // frame is ready but it's not yet time to display it, except when we haven't displayed any frames yet
- }
- }
-
- mFrameQueue->unlock();
- return frame;
-}
-
-std::string TheoraVideoClip::getName()
-{
- return mName;
-}
-
-bool TheoraVideoClip::isBusy()
-{
- return mAssignedWorkerThread || mOutputMode != mRequestedOutputMode;
-}
-
-TheoraOutputMode TheoraVideoClip::getOutputMode()
-{
- return mOutputMode;
-}
-
-void TheoraVideoClip::setOutputMode(TheoraOutputMode mode)
-{
- if (mode == TH_UNDEFINED) throw TheoraGenericException("Invalid output mode: TH_UNDEFINED for video: " + mName);
- if (mOutputMode == mode) return;
- mRequestedOutputMode = mode;
- mUseAlpha = (mode == TH_RGBA ||
- mode == TH_ARGB ||
- mode == TH_BGRA ||
- mode == TH_ABGR ||
- mode == TH_GREY3A ||
- mode == TH_AGREY3 ||
- mode == TH_YUVA ||
- mode == TH_AYUV);
- if (mAssignedWorkerThread)
- {
- mThreadAccessMutex->lock();
- // discard current frames and recreate them
- mFrameQueue->setSize(mFrameQueue->getSize());
- mThreadAccessMutex->unlock();
-
- }
- mOutputMode = mRequestedOutputMode;
-}
-
-float TheoraVideoClip::getTimePosition()
-{
- return mTimer->getTime();
-}
-
-int TheoraVideoClip::getNumPrecachedFrames()
-{
- return mFrameQueue->getSize();
-}
-
-void TheoraVideoClip::setNumPrecachedFrames(int n)
-{
- if (mFrameQueue->getSize() != n)
- mFrameQueue->setSize(n);
-}
-
-int TheoraVideoClip::_getNumReadyFrames()
-{
- if (mSeekFrame != -1) return 0;
- return mFrameQueue->_getReadyCount();
-}
-
-int TheoraVideoClip::getNumReadyFrames()
-{
- if (mSeekFrame != -1) return 0; // we are about to seek, consider frame queue empty even though it will be emptied upon seek
- return mFrameQueue->getReadyCount();
-}
-
-float TheoraVideoClip::getDuration()
-{
- return mDuration;
-}
-
-float TheoraVideoClip::getFPS()
-{
- return mFPS;
-}
-
-void TheoraVideoClip::play()
-{
- mTimer->play();
-}
-
-void TheoraVideoClip::pause()
-{
- mTimer->pause();
-}
-
-bool TheoraVideoClip::isPaused()
-{
- return mTimer->isPaused();
-}
-
-bool TheoraVideoClip::isDone()
-{
- return mEndOfFile && !mFrameQueue->getFirstAvailableFrame();
-}
-
-void TheoraVideoClip::stop()
-{
- pause();
- resetFrameQueue();
- mFirstFrameDisplayed = false;
- seek(0);
-}
-
-void TheoraVideoClip::setPlaybackSpeed(float speed)
-{
- mTimer->setSpeed(speed);
-}
-
-float TheoraVideoClip::getPlaybackSpeed()
-{
- return mTimer->getSpeed();
-}
-
-void TheoraVideoClip::seek(float time)
-{
- seekToFrame((int) (time * getFPS()));
-}
-
-void TheoraVideoClip::seekToFrame(int frame)
-{
- if (frame < 0) mSeekFrame = 0;
- else if (frame > mNumFrames) mSeekFrame = mNumFrames;
- else mSeekFrame = frame;
-
- mFirstFrameDisplayed = false;
- mEndOfFile = false;
-}
-
-void TheoraVideoClip::waitForCache(float desired_cache_factor, float max_wait_time)
-{
- mWaitingForCache = true;
- bool paused = mTimer->isPaused();
- if (!paused) mTimer->pause();
- int elapsed = 0;
- int desired_num_precached_frames = (int) (desired_cache_factor * getNumPrecachedFrames());
- while (getNumReadyFrames() < desired_num_precached_frames)
- {
- _psleep(10);
- elapsed += 10;
- if (elapsed >= max_wait_time * 1000) break;
- }
- if (!paused) mTimer->play();
- mWaitingForCache = false;
-}
-
-float TheoraVideoClip::getPriority()
-{
- return mPriority;
-}
-
-void TheoraVideoClip::setPriority(float priority)
-{
- mPriority = priority;
-}
-
-float TheoraVideoClip::getPriorityIndex()
-{
- float priority = (float) getNumReadyFrames();
- if (mTimer->isPaused()) priority += getNumPrecachedFrames() / 2;
-
- return priority;
-}
-
-void TheoraVideoClip::setAudioInterface(TheoraAudioInterface* iface)
-{
- mAudioInterface = iface;
- if (iface && !mAudioMutex) mAudioMutex = new TheoraMutex;
- if (!iface && mAudioMutex)
- {
- delete mAudioMutex;
- mAudioMutex = NULL;
- }
-}
-
-TheoraAudioInterface* TheoraVideoClip::getAudioInterface()
-{
- return mAudioInterface;
-}
-
-void TheoraVideoClip::setAudioGain(float gain)
-{
- if (gain > 1) mAudioGain=1;
- if (gain < 0) mAudioGain=0;
- else mAudioGain=gain;
-}
-
-float TheoraVideoClip::getAudioGain()
-{
- return mAudioGain;
-}
-
-void TheoraVideoClip::setAutoRestart(bool value)
-{
- mAutoRestart = value;
- if (value) mEndOfFile = false;
-}
diff --git a/drivers/theoraplayer/src/TheoraVideoFrame.cpp b/drivers/theoraplayer/src/TheoraVideoFrame.cpp
deleted file mode 100644
index b70253dabf..0000000000
--- a/drivers/theoraplayer/src/TheoraVideoFrame.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <memory.h>
-#include "TheoraPixelTransform.h"
-#include "TheoraVideoClip.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-
-//#define YUV_TEST // uncomment this if you want to benchmark YUV decoding functions
-
-extern "C"
-{
-void decodeRGB (struct TheoraPixelTransform* t);
-void decodeRGBA (struct TheoraPixelTransform* t);
-void decodeRGBX (struct TheoraPixelTransform* t);
-void decodeARGB (struct TheoraPixelTransform* t);
-void decodeXRGB (struct TheoraPixelTransform* t);
-void decodeBGR (struct TheoraPixelTransform* t);
-void decodeBGRA (struct TheoraPixelTransform* t);
-void decodeBGRX (struct TheoraPixelTransform* t);
-void decodeABGR (struct TheoraPixelTransform* t);
-void decodeXBGR (struct TheoraPixelTransform* t);
-void decodeGrey (struct TheoraPixelTransform* t);
-void decodeGrey3(struct TheoraPixelTransform* t);
-void decodeGreyA(struct TheoraPixelTransform* t);
-void decodeGreyX(struct TheoraPixelTransform* t);
-void decodeAGrey(struct TheoraPixelTransform* t);
-void decodeXGrey(struct TheoraPixelTransform* t);
-void decodeYUV (struct TheoraPixelTransform* t);
-void decodeYUVA (struct TheoraPixelTransform* t);
-void decodeYUVX (struct TheoraPixelTransform* t);
-void decodeAYUV (struct TheoraPixelTransform* t);
-void decodeXYUV (struct TheoraPixelTransform* t);
-}
-
-static void (*conversion_functions[])(struct TheoraPixelTransform*) = {0,
- decodeRGB,
- decodeRGBA,
- decodeRGBX,
- decodeARGB,
- decodeXRGB,
- decodeBGR,
- decodeBGRA,
- decodeBGRX,
- decodeABGR,
- decodeXBGR,
- decodeGrey,
- decodeGrey3,
- decodeGreyA,
- decodeGreyX,
- decodeAGrey,
- decodeXGrey,
- decodeYUV,
- decodeYUVA,
- decodeYUVX,
- decodeAYUV,
- decodeXYUV
-};
-
-TheoraVideoFrame::TheoraVideoFrame(TheoraVideoClip* parent)
-{
- mReady = mInUse = false;
- mParent = parent;
- mIteration = 0;
- // number of bytes based on output mode
- int bytemap[]={0, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 1, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4};
- mBpp = bytemap[mParent->getOutputMode()];
- unsigned int size = mParent->getStride() * mParent->mHeight * mBpp;
- try
- {
- mBuffer = new unsigned char[size];
- }
- catch (std::bad_alloc)
- {
- mBuffer = NULL;
- return;
- }
- memset(mBuffer, 255, size);
-}
-
-TheoraVideoFrame::~TheoraVideoFrame()
-{
- if (mBuffer) delete [] mBuffer;
-}
-
-int TheoraVideoFrame::getWidth()
-{
- return mParent->getWidth();
-}
-
-int TheoraVideoFrame::getStride()
-{
- return mParent->mStride;
-}
-
-int TheoraVideoFrame::getHeight()
-{
- return mParent->getHeight();
-}
-
-unsigned char* TheoraVideoFrame::getBuffer()
-{
- return mBuffer;
-}
-
-void TheoraVideoFrame::decode(struct TheoraPixelTransform* t)
-{
- if (t->raw != NULL)
- {
- int bufferStride = mParent->getWidth() * mBpp;
- if (bufferStride == t->rawStride)
- {
- memcpy(mBuffer, t->raw, t->rawStride * mParent->getHeight());
- }
- else
- {
- unsigned char *buff = mBuffer, *src = t->raw;
- int i, h = mParent->getHeight();
- for (i = 0; i < h; ++i, buff += bufferStride, src += t->rawStride)
- {
- memcpy(buff, src, bufferStride);
- }
- }
- }
- else
- {
- t->out = mBuffer;
- t->w = mParent->getWidth();
- t->h = mParent->getHeight();
-
-#ifdef YUV_TEST // when benchmarking yuv conversion functions during development, do a timed average
- #define N 1000
- clock_t time = clock();
- for (int i = 0; i < N; ++i)
- {
- conversion_functions[mParent->getOutputMode()](t);
- }
- float diff = (clock() - time) * 1000.0f / CLOCKS_PER_SEC;
-
- char s[128];
- sprintf(s, "%.2f", diff / N);
- TheoraVideoManager::getSingleton().logMessage("YUV Decoding time: " + std::string(s) + " ms\n");
-#else
- conversion_functions[mParent->getOutputMode()](t);
-#endif
- }
- mReady = true;
-}
-
-void TheoraVideoFrame::clear()
-{
- mInUse = mReady = false;
-}
diff --git a/drivers/theoraplayer/src/TheoraVideoManager.cpp b/drivers/theoraplayer/src/TheoraVideoManager.cpp
deleted file mode 100644
index 53b211374a..0000000000
--- a/drivers/theoraplayer/src/TheoraVideoManager.cpp
+++ /dev/null
@@ -1,485 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraVideoManager.h"
-#include "TheoraWorkerThread.h"
-#include "TheoraVideoClip.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraAudioInterface.h"
-#include "TheoraUtil.h"
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#ifdef __THEORA
- #include <theora/codec.h>
- #include <vorbis/codec.h>
- #include "TheoraVideoClip_Theora.h"
-#endif
-#ifdef __AVFOUNDATION
- #include "TheoraVideoClip_AVFoundation.h"
-#endif
-#ifdef __FFMPEG
- #include "TheoraVideoClip_FFmpeg.h"
-#endif
-#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection
- #include "cpu-features.h"
-#endif
-// declaring function prototype here so I don't have to put it in a header file
-// it only needs to be used by this plugin and called once
-extern "C"
-{
- void initYUVConversionModule();
-}
-
-#include "core/os/memory.h"
-
-//#define _DECODING_BENCHMARK //uncomment to test average decoding time on a given device
-
-
-// --------------------------
-//#define _SCHEDULING_DEBUG
-#ifdef _SCHEDULING_DEBUG
-float gThreadDiagnosticTimer = 0;
-#endif
-// --------------------------
-
-#ifdef _DECODING_BENCHMARK
-void benchmark(TheoraVideoClip* clip)
-{
- int nPrecached = 256;
- int n = nPrecached;
- char msg[1024];
- clock_t t = clock();
- while (n > 0)
- {
- clip->waitForCache(1.0f, 1000000);
- n -= 32;
- clip->getFrameQueue()->clear();
- }
- float diff = ((float) (clock() - t) * 1000.0f) / CLOCKS_PER_SEC;
- sprintf(msg, "BENCHMARK: %s: Decoding %d frames took %.1fms (%.2fms average per frame)\n",clip->getName().c_str(), nPrecached, diff, diff / nPrecached);
- TheoraVideoManager::getSingleton().logMessage(msg);
- clip->seek(0);
-}
-#endif
-
-struct TheoraWorkCandidate
-{
- TheoraVideoClip* clip;
- float priority, queuedTime, workTime, entitledTime;
-};
-
-TheoraVideoManager* g_ManagerSingleton = NULL;
-
-void theora_writelog(std::string output)
-{
- printf("%s\n", output.c_str());
-}
-
-void (*g_LogFuction)(std::string) = theora_writelog;
-
-void TheoraVideoManager::setLogFunction(void (*fn)(std::string))
-{
- g_LogFuction = fn;
-}
-
-TheoraVideoManager* TheoraVideoManager::getSingletonPtr()
-{
- return g_ManagerSingleton;
-}
-
-TheoraVideoManager& TheoraVideoManager::getSingleton()
-{
- return *g_ManagerSingleton;
-}
-
-TheoraVideoManager::TheoraVideoManager(int num_worker_threads) :
- mDefaultNumPrecachedFrames(8)
-{
- if (num_worker_threads < 1) throw TheoraGenericException("Unable to create TheoraVideoManager, at least one worker thread is reqired");
-
- g_ManagerSingleton = this;
-
- std::string msg = "Initializing Theora Playback Library (" + getVersionString() + ")\n";
-#ifdef __THEORA
- msg += " - libtheora version: " + std::string(th_version_string()) + "\n" +
- " - libvorbis version: " + std::string(vorbis_version_string()) + "\n";
-#endif
-#ifdef _ANDROID
- uint64_t features = android_getCpuFeaturesExt();
- char s[128];
- sprintf(s, " - Android: CPU Features: %u\n", (unsigned int) features);
- msg += s;
- if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0)
- msg += " - Android: NEON features NOT SUPPORTED by CPU\n";
- else
- msg += " - Android: Detected NEON CPU features\n";
-#endif
-
-#ifdef __AVFOUNDATION
- msg += " - using Apple AVFoundation classes.\n";
-#endif
-#ifdef __FFMPEG
- msg += " - using FFmpeg library.\n";
-#endif
-
- logMessage(msg + "------------------------------------");
- mAudioFactory = NULL;
- mWorkMutex = new TheoraMutex();
-
- // for CPU based yuv2rgb decoding
- initYUVConversionModule();
-
- createWorkerThreads(num_worker_threads);
-}
-
-TheoraVideoManager::~TheoraVideoManager()
-{
- destroyWorkerThreads();
-
- mWorkMutex->lock();
- ClipList::iterator ci;
- for (ci = mClips.begin(); ci != mClips.end(); ++ci)
- delete (*ci);
- mClips.clear();
- mWorkMutex->unlock();
- delete mWorkMutex;
-}
-
-void TheoraVideoManager::logMessage(std::string msg)
-{
- g_LogFuction(msg);
-}
-
-TheoraVideoClip* TheoraVideoManager::getVideoClipByName(std::string name)
-{
- TheoraVideoClip* clip = NULL;
- mWorkMutex->lock();
-
- foreach(TheoraVideoClip*, mClips)
- {
- if ((*it)->getName() == name)
- {
- clip = *it;
- break;
- }
- }
- mWorkMutex->unlock();
-
- return clip;
-}
-
-void TheoraVideoManager::setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory)
-{
- mAudioFactory = factory;
-}
-
-TheoraAudioInterfaceFactory* TheoraVideoManager::getAudioInterfaceFactory()
-{
- return mAudioFactory;
-}
-
-TheoraVideoClip* TheoraVideoManager::createVideoClip(std::string filename,
- TheoraOutputMode output_mode,
- int numPrecachedOverride,
- bool usePower2Stride,
- int p_track)
-{
- TheoraDataSource* src=memnew(TheoraFileDataSource(filename));
- return createVideoClip(src,output_mode,numPrecachedOverride,usePower2Stride, p_track);
-}
-
-TheoraVideoClip* TheoraVideoManager::createVideoClip(TheoraDataSource* data_source,
- TheoraOutputMode output_mode,
- int numPrecachedOverride,
- bool usePower2Stride,
- int p_audio_track)
-{
- mWorkMutex->lock();
-
- TheoraVideoClip* clip = NULL;
- int nPrecached = numPrecachedOverride ? numPrecachedOverride : mDefaultNumPrecachedFrames;
- logMessage("Creating video from data source: " + data_source->repr() + " [" + str(nPrecached) + " precached frames].");
-
-#ifdef __AVFOUNDATION
- TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(data_source);
- std::string filename;
- if (fileDataSource == NULL)
- {
- TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(data_source);
- if (memoryDataSource != NULL) filename = memoryDataSource->getFilename();
- // if the user has his own data source, it's going to be a problem for AVAssetReader since it only supports reading from files...
- }
- else filename = fileDataSource->getFilename();
-
- if (filename.size() > 4 && filename.substr(filename.size() - 4, filename.size()) == ".mp4")
- {
- clip = new TheoraVideoClip_AVFoundation(data_source, output_mode, nPrecached, usePower2Stride);
- }
-#endif
-#if defined(__AVFOUNDATION) && defined(__THEORA)
- else
-#endif
-#ifdef __THEORA
- clip = new TheoraVideoClip_Theora(data_source, output_mode, nPrecached, usePower2Stride);
-#endif
-#ifdef __FFMPEG
- clip = new TheoraVideoClip_FFmpeg(data_source, output_mode, nPrecached, usePower2Stride);
-#endif
-
- clip->set_audio_track(p_audio_track);
- clip->load(data_source);
- clip->decodeNextFrame(); // ensure the first frame is always preloaded and have the main thread do it to prevent potential thread starvatio
-
- mClips.push_back(clip);
- mWorkMutex->unlock();
-
-#ifdef _DECODING_BENCHMARK
- benchmark(clip);
-#endif
- return clip;
-}
-
-void TheoraVideoManager::destroyVideoClip(TheoraVideoClip* clip)
-{
- if (clip)
- {
- th_writelog("Destroying video clip: " + clip->getName());
- mWorkMutex->lock();
- bool reported = 0;
- while (clip->mAssignedWorkerThread)
- {
- if (!reported)
- {
- th_writelog(" - Waiting for WorkerThread to finish decoding in order to destroy");
- reported = 1;
- }
- _psleep(1);
- }
- if (reported) th_writelog(" - WorkerThread done, destroying...");
-
- // erase the clip from the clip list
- foreach (TheoraVideoClip*, mClips)
- {
- if ((*it) == clip)
- {
- mClips.erase(it);
- break;
- }
- }
- // remove all it's references from the work log
- mWorkLog.remove(clip);
-
- // delete the actual clip
- delete clip;
-#ifdef _DEBUG
- th_writelog("Destroyed video.");
-#endif
- mWorkMutex->unlock();
- }
-}
-
-TheoraVideoClip* TheoraVideoManager::requestWork(TheoraWorkerThread* caller)
-{
- if (!mWorkMutex) return NULL;
- mWorkMutex->lock();
-
- TheoraVideoClip* selectedClip = NULL;
- float maxQueuedTime = 0, totalAccessCount = 0, prioritySum = 0, diff, maxDiff = -1;
- int nReadyFrames;
- std::vector<TheoraWorkCandidate> candidates;
- TheoraVideoClip* clip;
- TheoraWorkCandidate candidate;
-
- // first pass is for playing videos, but if no such videos are available for decoding
- // paused videos are selected in the second pass.
- // Note that paused videos that are waiting for cache are considered equal to playing
- // videos in the scheduling context
-
- for (int i = 0; i < 2 && candidates.size() == 0; ++i)
- {
- foreach (TheoraVideoClip*, mClips)
- {
- clip = *it;
- if (clip->isBusy() || (i == 0 && clip->isPaused() && !clip->mWaitingForCache)) continue;
- nReadyFrames = clip->getNumReadyFrames();
- if (nReadyFrames == clip->getFrameQueue()->getSize()) continue;
-
- candidate.clip = clip;
- candidate.priority = clip->getPriority();
- candidate.queuedTime = (float) nReadyFrames / (clip->getFPS() * clip->getPlaybackSpeed());
- candidate.workTime = (float) clip->mThreadAccessCount;
-
- totalAccessCount += candidate.workTime;
- if (maxQueuedTime < candidate.queuedTime) maxQueuedTime = candidate.queuedTime;
-
- candidates.push_back(candidate);
- }
- }
-
- // prevent division by zero
- if (totalAccessCount == 0) totalAccessCount = 1;
- if (maxQueuedTime == 0) maxQueuedTime = 1;
-
- // normalize candidate values
- foreach (TheoraWorkCandidate, candidates)
- {
- it->workTime /= totalAccessCount;
- // adjust user priorities to favor clips that have fewer frames queued
- it->priority *= 1.0f - (it->queuedTime / maxQueuedTime) * 0.5f;
- prioritySum += it->priority;
- }
- foreach (TheoraWorkCandidate, candidates)
- {
- it->entitledTime = it->priority / prioritySum;
- }
-
- // now, based on how much access time has been given to each clip in the work log
- // and how much time should be given to each clip based on calculated priorities,
- // we choose a best suited clip for this worker thread to decode next
- foreach (TheoraWorkCandidate, candidates)
- {
- diff = it->entitledTime - it->workTime;
-
- if (maxDiff < diff)
- {
- maxDiff = diff;
- selectedClip = it->clip;
- }
- }
-
- if (selectedClip)
- {
- selectedClip->mAssignedWorkerThread = caller;
-
- int nClips = (int) mClips.size();
- unsigned int maxWorkLogSize = (nClips - 1) * 50;
-
- if (nClips > 1)
- {
- mWorkLog.push_front(selectedClip);
- ++selectedClip->mThreadAccessCount;
- }
-
- TheoraVideoClip* c;
- while (mWorkLog.size() > maxWorkLogSize)
- {
- c = mWorkLog.back();
- mWorkLog.pop_back();
- c->mThreadAccessCount--;
- }
-#ifdef _SCHEDULING_DEBUG
- if (mClips.size() > 1)
- {
- int accessCount = mWorkLog.size();
- if (gThreadDiagnosticTimer > 2.0f)
- {
- gThreadDiagnosticTimer = 0;
- std::string logstr = "-----\nTheora Playback Library debug CPU time analysis (" + str(accessCount) + "):\n";
- int percent;
- foreach (TheoraVideoClip*, mClips)
- {
- percent = ((float) (*it)->mThreadAccessCount / mWorkLog.size()) * 100.0f;
- logstr += (*it)->getName() + " (" + str((*it)->getPriority()) + "): " + str((*it)->mThreadAccessCount) + ", " + str(percent) + "%\n";
- }
- logstr += "-----";
- th_writelog(logstr);
- }
- }
-#endif
- }
-
- mWorkMutex->unlock();
- return selectedClip;
-}
-
-void TheoraVideoManager::update(float timeDelta)
-{
- mWorkMutex->lock();
- foreach (TheoraVideoClip*, mClips)
- {
- (*it)->update(timeDelta);
- (*it)->decodedAudioCheck();
- }
- mWorkMutex->unlock();
-#ifdef _SCHEDULING_DEBUG
- gThreadDiagnosticTimer += timeDelta;
-#endif
-}
-
-int TheoraVideoManager::getNumWorkerThreads()
-{
- return (int) mWorkerThreads.size();
-}
-
-void TheoraVideoManager::createWorkerThreads(int n)
-{
- TheoraWorkerThread* t;
- for (int i=0;i<n;++i)
- {
- t=new TheoraWorkerThread();
- t->start();
- mWorkerThreads.push_back(t);
- }
-}
-
-void TheoraVideoManager::destroyWorkerThreads()
-{
- foreach(TheoraWorkerThread*,mWorkerThreads)
- {
- (*it)->join();
- delete (*it);
- }
- mWorkerThreads.clear();
-}
-
-void TheoraVideoManager::setNumWorkerThreads(int n)
-{
- if (n == getNumWorkerThreads()) return;
- if (n < 1) throw TheoraGenericException("Unable to change the number of worker threads in TheoraVideoManager, at least one worker thread is reqired");
-
- th_writelog("changing number of worker threats to: "+str(n));
-
- destroyWorkerThreads();
- createWorkerThreads(n);
-}
-
-std::string TheoraVideoManager::getVersionString()
-{
- int a, b, c;
- getVersion(&a, &b, &c);
- std::string out = str(a) + "." + str(b);
- if (c != 0)
- {
- if (c < 0) out += " RC" + str(-c);
- else out += "." + str(c);
- }
- return out;
-}
-
-void TheoraVideoManager::getVersion(int* a, int* b, int* c) // TODO, return a struct instead of the current solution.
-{
- *a = 1;
- *b = 1;
- *c = 0;
-}
-
-std::vector<std::string> TheoraVideoManager::getSupportedDecoders()
-{
- std::vector<std::string> lst;
-#ifdef __THEORA
- lst.push_back("Theora");
-#endif
-#ifdef __AVFOUNDATION
- lst.push_back("AVFoundation");
-#endif
-#ifdef __FFMPEG
- lst.push_back("FFmpeg");
-#endif
-
- return lst;
-}
diff --git a/drivers/theoraplayer/src/TheoraWorkerThread.cpp b/drivers/theoraplayer/src/TheoraWorkerThread.cpp
deleted file mode 100644
index cef8545b8d..0000000000
--- a/drivers/theoraplayer/src/TheoraWorkerThread.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef _WIN32
-#pragma warning( disable: 4251 ) // MSVC++
-#endif
-#include "TheoraWorkerThread.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip.h"
-#include "TheoraUtil.h"
-
-TheoraWorkerThread::TheoraWorkerThread() : TheoraThread()
-{
- mClip = NULL;
-}
-
-TheoraWorkerThread::~TheoraWorkerThread()
-{
-
-}
-
-void TheoraWorkerThread::execute()
-{
- while (isRunning())
- {
- mClip = TheoraVideoManager::getSingleton().requestWork(this);
- if (!mClip)
- {
- _psleep(100);
- continue;
- }
-
- mClip->mThreadAccessMutex->lock();
- // if user requested seeking, do that then.
- if (mClip->mSeekFrame >= 0) mClip->doSeek();
-
- if (!mClip->decodeNextFrame())
- _psleep(1); // this happens when the video frame queue is full.
-
- mClip->mAssignedWorkerThread = NULL;
- mClip->mThreadAccessMutex->unlock();
- mClip = NULL;
- }
-}
diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c
deleted file mode 100644
index 8af5dd1f58..0000000000
--- a/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "yuv_util.h"
-
-static void _decodeGrey3(struct TheoraPixelTransform* t, int stride, int nBytes)
-{
- unsigned char *ySrc = t->y, *yLineEnd, *out = t->out;
- unsigned int y;
- for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w, out += stride-t->w * nBytes)
- for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, out += nBytes)
- out[0] = out[1] = out[2] = *ySrc;
-}
-
-void decodeGrey(struct TheoraPixelTransform* t)
-{
- unsigned char *ySrc = t->y, *yLineEnd, *out = t->out;
- unsigned int y;
- for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w)
- for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, ++out)
- *out = *ySrc;
-
-}
-
-void decodeGrey3(struct TheoraPixelTransform* t)
-{
- _decodeGrey3(t, t->w * 3, 3);
-}
-
-void decodeGreyA(struct TheoraPixelTransform* t)
-{
- _decodeGrey3(t, t->w * 4, 4);
- _decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeGreyX(struct TheoraPixelTransform* t)
-{
- _decodeGrey3(t, t->w * 4, 4);
-}
-
-void decodeAGrey(struct TheoraPixelTransform* t)
-{
- _decodeGrey3(incOut(t, 1), t->w * 4, 4);
- _decodeAlpha(t, t->w * 4);
-}
-
-void decodeXGrey(struct TheoraPixelTransform* t)
-{
- _decodeGrey3(incOut(t, 1), t->w * 4, 4);
-}
-
diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c
deleted file mode 100644
index e981e75ead..0000000000
--- a/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef _YUV_C
-#include "yuv_util.h"
-
-int YTable [256];
-int BUTable[256];
-int GUTable[256];
-int GVTable[256];
-int RVTable[256];
-
-#define CLIP_RGB_COLOR(dst, x) \
- tmp = (x) >> 13;\
- if ((tmp & ~0xFF) == 0) dst = tmp;\
- else dst = (-tmp) >> 31;
-
-#define _decodeRGB(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3)\
- register int tmp;\
- int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\
- unsigned int y;\
- unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\
- \
- for (y = 0; y < t->h; y += 2)\
- {\
- ySrcEven = t->y + y * t->yStride;\
- ySrcOdd = t->y + (y + 1) * t->yStride;\
- uSrc = t->u + y * t->uStride / 2;\
- vSrc = t->v + y * t->vStride / 2;\
- out1 = t->out + y * stride;\
- out2 = t->out + (y + 1) * stride;\
- \
- for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\
- {\
- cu = *uSrc; ++uSrc;\
- cv = *vSrc; ++vSrc;\
- rV = RVTable[cv];\
- gUV = GUTable[cu] + GVTable[cv];\
- bU = BUTable[cu];\
- \
- rgbY1 = YTable[*ySrcEven]; ++ySrcEven;\
- rgbY2 = YTable[*ySrcOdd]; ++ySrcOdd;\
- rgbY3 = YTable[*ySrcEven]; ++ySrcEven;\
- rgbY4 = YTable[*ySrcOdd]; ++ySrcOdd;\
- \
- CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\
- CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\
- CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\
- \
- CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\
- CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\
- CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\
- \
- CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\
- CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\
- CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\
- \
- CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\
- CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\
- CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\
- \
- out1 += nBytes2; out2 += nBytes2;\
- }\
- }
-
-// The 'trick' with this function is that it skips decoding YUV pixels if the alpha value is 0, thus improving the decoding speed of a frame
-#define _decodeRGBA(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3, aindex1, aindex2)\
-\
- register int tmp;\
- int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\
- int alphaStride = t->w;\
- unsigned int y;\
- unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\
- \
- for (y = 0; y < t->h; y += 2)\
- {\
- ySrcEven = t->y + y * t->yStride;\
- ySrcOdd = t->y + (y + 1) * t->yStride;\
- uSrc = t->u + y * t->uStride / 2;\
- vSrc = t->v + y * t->vStride / 2;\
- out1 = t->out + y * stride;\
- out2 = t->out + (y + 1) * stride;\
- \
- for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\
- {\
- cu = *uSrc; ++uSrc;\
- cv = *vSrc; ++vSrc;\
- rV = RVTable[cv];\
- gUV = GUTable[cu] + GVTable[cv];\
- bU = BUTable[cu];\
- \
- rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven;\
- rgbY2 = YTable[*ySrcOdd]; a2 = ySrcOdd [alphaStride]; ++ySrcOdd;\
- rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven;\
- rgbY4 = YTable[*ySrcOdd]; a4 = ySrcOdd [alphaStride]; ++ySrcOdd;\
- \
- if (a1 > 16)\
- {\
- CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\
- CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\
- CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\
- out1[aindex1] = a1 >= 235 ? 255 : (unsigned char) (((a1 - 16) * 255) / 219);\
- }\
- else *((unsigned int*) out1) = 0;\
- \
- if (a2 > 16)\
- {\
- CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\
- CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\
- CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\
- out2[aindex1] = a2 >= 235 ? 255 : (unsigned char) (((a2 - 16) * 255) / 219);\
- }\
- else *((unsigned int*) out2) = 0;\
- \
- if (a3 > 16)\
- {\
- CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\
- CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\
- CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\
- out1[aindex2] = a3 >= 235 ? 255 : (unsigned char) (((a3 - 16) * 255) / 219);\
- }\
- else *((unsigned int*) &out1[4]) = 0;\
- \
- if (a4 > 16)\
- {\
- CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\
- CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\
- CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\
- out2[aindex2] = a4 >= 235 ? 255 : (unsigned char) (((a4 - 16) * 255) / 219);\
- }\
- else *((unsigned int*) &out2[4]) = 0;\
- \
- out1 += nBytes2; out2 += nBytes2;\
- }\
- }\
-
-void decodeRGB(struct TheoraPixelTransform* t)
-{
- _decodeRGB(t, t->w * 3, 3, 0, 0, 1, 2, 3, 4, 5);
-}
-
-void decodeRGBA(struct TheoraPixelTransform* t)
-{
- _decodeRGBA(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6, 3, 7);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-// _decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6);
-// _decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeRGBX(struct TheoraPixelTransform* t)
-{
- _decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6);
-}
-
-void decodeARGB(struct TheoraPixelTransform* t)
-{
- _decodeRGBA(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7, 0, 4);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-// _decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7);
-// _decodeAlpha(t, t->w * 4);
-}
-
-void decodeXRGB(struct TheoraPixelTransform* t)
-{
- _decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7);
-}
-
-void decodeBGR(struct TheoraPixelTransform* t)
-{
- _decodeRGB(t, t->w * 3, 3, 0, 2, 1, 0, 5, 4, 3);
-}
-
-void decodeBGRA(struct TheoraPixelTransform* t)
-{
- _decodeRGBA(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4, 3, 7);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-// _decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4);
-// _decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeBGRX(struct TheoraPixelTransform* t)
-{
- _decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4);
-}
-
-void decodeABGR(struct TheoraPixelTransform* t)
-{
- _decodeRGBA(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5, 0, 4);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-// _decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5);
-// _decodeAlpha(t, t->w * 4);
-}
-
-void decodeXBGR(struct TheoraPixelTransform* t)
-{
- _decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5);
-}
-
-void initYUVConversionModule()
-{
- //used to bring the table into the high side (scale up) so we
- //can maintain high precision and not use floats (FIXED POINT)
-
- // this is the pseudocode for yuv->rgb conversion
- // r = 1.164*(*ySrc - 16) + 1.596*(cv - 128);
- // b = 1.164*(*ySrc - 16) + 2.018*(cu - 128);
- // g = 1.164*(*ySrc - 16) - 0.813*(cv - 128) - 0.391*(cu - 128);
-
- double scale = 1L << 13, temp;
-
- int i;
- for (i = 0; i < 256; ++i)
- {
- temp = i - 128;
-
- YTable[i] = (int)((1.164 * scale + 0.5) * (i - 16)); //Calc Y component
- RVTable[i] = (int)((1.596 * scale + 0.5) * temp); //Calc R component
- GUTable[i] = (int)((0.391 * scale + 0.5) * temp); //Calc G u & v components
- GVTable[i] = (int)((0.813 * scale + 0.5) * temp);
- BUTable[i] = (int)((2.018 * scale + 0.5) * temp); //Calc B component
- }
-}
-
-/*
- * Below are the function versions of the above macros, use those for debugging, but leave the macros for maximum CPU execution speed
- *
- *
- *
- *
-
-void _decodeRGB(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3)
-{
- register int tmp;
- int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;
- unsigned int y;
- unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
-
- for (y = 0; y < t->h; y += 2)
- {
- ySrcEven = t->y + y * t->yStride;
- ySrcOdd = t->y + (y + 1) * t->yStride;
- uSrc = t->u + y * t->uStride / 2;
- vSrc = t->v + y * t->vStride / 2;
- out1 = t->out + y * stride;
- out2 = t->out + (y + 1) * stride;
-
- for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
- {
- cu = *uSrc; ++uSrc;
- cv = *vSrc; ++vSrc;
- rV = RVTable[cv];
- gUV = GUTable[cu] + GVTable[cv];
- bU = BUTable[cu];
-
- rgbY1 = YTable[*ySrcEven]; ++ySrcEven;
- rgbY2 = YTable[*ySrcOdd]; ++ySrcOdd;
- rgbY3 = YTable[*ySrcEven]; ++ySrcEven;
- rgbY4 = YTable[*ySrcOdd]; ++ySrcOdd;
-
- CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );
- CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);
- CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );
-
- CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );
- CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);
- CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );
-
- CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );
- CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);
- CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );
-
- CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );
- CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);
- CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );
-
- out1 += nBytes2; out2 += nBytes2;
- }
- }
-}
-
-void _decodeRGBA(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3, int aindex1, int aindex2)
-{
- register int tmp;
- int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;
- int alphaStride = t->w;
- unsigned int y;
- unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
-
- for (y = 0; y < t->h; y += 2)
- {
- ySrcEven = t->y + y * t->yStride;
- ySrcOdd = t->y + (y + 1) * t->yStride;
- uSrc = t->u + y * t->uStride / 2;
- vSrc = t->v + y * t->vStride / 2;
- out1 = t->out + y * stride;
- out2 = t->out + (y + 1) * stride;
-
- for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
- {
- cu = *uSrc; ++uSrc;
- cv = *vSrc; ++vSrc;
- rV = RVTable[cv];
- gUV = GUTable[cu] + GVTable[cv];
- bU = BUTable[cu];
-
- rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven;
- rgbY2 = YTable[*ySrcOdd]; a2 = ySrcOdd [alphaStride]; ++ySrcOdd;
- rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven;
- rgbY4 = YTable[*ySrcOdd]; a4 = ySrcOdd [alphaStride]; ++ySrcOdd;
-
- if (a1 >= 32)
- {
- CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );
- CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);
- CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );
- out1[aindex1] = a1 > 224 ? 255 : a1;
- }
- else *((unsigned int*) out1) = 0;
-
- if (a2 >= 32)
- {
- CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );
- CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);
- CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );
- out2[aindex1] = a2 > 224 ? 255 : a2;
- }
- else *((unsigned int*) out2) = 0;
-
-
- if (a3 >= 32)
- {
- CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );
- CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);
- CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );
- out1[aindex2] = a3 > 224 ? 255 : a3;
- }
- else *((unsigned int*) &out1[4]) = 0;
-
- if (a4 >= 32)
- {
- CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );
- CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);
- CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );
- out2[aindex2] = a4 > 224 ? 255 : a4;
- }
- else *((unsigned int*) &out2[4]) = 0;
-
- out1 += nBytes2; out2 += nBytes2;
- }
- }
-}
-*/
-#endif
diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c
deleted file mode 100644
index fea74eca71..0000000000
--- a/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "yuv_util.h"
-
-static void _decodeYUV(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth)
-{
- int cv, cu, y1, y2, y3, y4, width = maxWidth == 0 ? t->w : maxWidth;
- unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
- unsigned int y;
-
- for (y=0; y < t->h; y += 2)
- {
- ySrcEven = t->y + y * t->yStride;
- ySrcOdd = t->y + (y + 1) * t->yStride;
- uSrc = t->u + y * t->uStride / 2;
- vSrc = t->v + y * t->vStride / 2;
- out1 = t->out + y * stride;
- out2 = t->out + (y + 1) * stride;
-
- for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
- {
- // EVEN columns
- cu = *uSrc; ++uSrc;
- cv = *vSrc; ++vSrc;
-
- y1 = *ySrcEven; ++ySrcEven;
- y2 = *ySrcOdd; ++ySrcOdd;
- y3 = *ySrcEven; ++ySrcEven;
- y4 = *ySrcOdd; ++ySrcOdd;
-
- // EVEN columns
- out1[0] = y1;
- out1[1] = cu;
- out1[2] = cv;
-
- out2[0] = y2;
- out2[1] = cu;
- out2[2] = cv;
-
- out1 += nBytes; out2 += nBytes;
- // ODD columns
- out1[0] = y3;
- out1[1] = cu;
- out1[2] = cv;
-
- out2[0] = y4;
- out2[1] = cu;
- out2[2] = cv;
- out1 += nBytes; out2 += nBytes;
- }
- }
-}
-
-void decodeYUV(struct TheoraPixelTransform* t)
-{
- _decodeYUV(t, t->w * 3, 3, 0);
-}
-
-void decodeYUVA(struct TheoraPixelTransform* t)
-{
- _decodeYUV(t, t->w * 4, 4, 0);
- _decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeYUVX(struct TheoraPixelTransform* t)
-{
- _decodeYUV(t, t->w * 4, 4, 0);
-}
-
-void decodeAYUV(struct TheoraPixelTransform* t)
-{
- _decodeYUV(incOut(t, 1), t->w * 4, 4, 0);
- _decodeAlpha(t, t->w * 4);
-}
-
-void decodeXYUV(struct TheoraPixelTransform* t)
-{
- _decodeYUV(incOut(t, 1), t->w * 4, 4, 0);
-}
-
diff --git a/drivers/theoraplayer/src/YUV/android/cpu-features.c b/drivers/theoraplayer/src/YUV/android/cpu-features.c
deleted file mode 100644
index 623dc94e0e..0000000000
--- a/drivers/theoraplayer/src/YUV/android/cpu-features.c
+++ /dev/null
@@ -1,1095 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/* ChangeLog for this library:
- *
- * NDK r8d: Add android_setCpu().
- *
- * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,
- * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.
- *
- * Rewrite the code to parse /proc/self/auxv instead of
- * the "Features" field in /proc/cpuinfo.
- *
- * Dynamically allocate the buffer that hold the content
- * of /proc/cpuinfo to deal with newer hardware.
- *
- * NDK r7c: Fix CPU count computation. The old method only reported the
- * number of _active_ CPUs when the library was initialized,
- * which could be less than the real total.
- *
- * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7
- * for an ARMv6 CPU (see below).
- *
- * Handle kernels that only report 'neon', and not 'vfpv3'
- * (VFPv3 is mandated by the ARM architecture is Neon is implemented)
- *
- * Handle kernels that only report 'vfpv3d16', and not 'vfpv3'
- *
- * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in
- * android_getCpuFamily().
- *
- * NDK r4: Initial release
- */
-
-#if 0
-
-#ifdef _ANDROID
-#if defined(__le32__)
-
-// When users enter this, we should only provide interface and
-// libportable will give the implementations.
-
-#else // !__le32__
-
-#include <sys/system_properties.h>
-#include <pthread.h>
-#include "cpu-features.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <errno.h>
-
-static pthread_once_t g_once;
-static int g_inited;
-static AndroidCpuFamily g_cpuFamily;
-static uint64_t g_cpuFeatures;
-static int g_cpuCount;
-
-#ifdef __arm__
-static uint32_t g_cpuIdArm;
-#endif
-
-static const int android_cpufeatures_debug = 0;
-
-#ifdef __arm__
-# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM
-#elif defined __i386__
-# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86
-#else
-# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN
-#endif
-
-#define D(...) \
- do { \
- if (android_cpufeatures_debug) { \
- printf(__VA_ARGS__); fflush(stdout); \
- } \
- } while (0)
-
-#ifdef __i386__
-static __inline__ void x86_cpuid(int func, int values[4])
-{
- int a, b, c, d;
- /* We need to preserve ebx since we're compiling PIC code */
- /* this means we can't use "=b" for the second output register */
- __asm__ __volatile__ ( \
- "push %%ebx\n"
- "cpuid\n" \
- "mov %%ebx, %1\n"
- "pop %%ebx\n"
- : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
- : "a" (func) \
- );
- values[0] = a;
- values[1] = b;
- values[2] = c;
- values[3] = d;
-}
-#endif
-
-/* Get the size of a file by reading it until the end. This is needed
- * because files under /proc do not always return a valid size when
- * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
- */
-static int
-get_file_size(const char* pathname)
-{
- int fd, ret, result = 0;
- char buffer[256];
-
- fd = open(pathname, O_RDONLY);
- if (fd < 0) {
- D("Can't open %s: %s\n", pathname, strerror(errno));
- return -1;
- }
-
- for (;;) {
- int ret = read(fd, buffer, sizeof buffer);
- if (ret < 0) {
- if (errno == EINTR)
- continue;
- D("Error while reading %s: %s\n", pathname, strerror(errno));
- break;
- }
- if (ret == 0)
- break;
-
- result += ret;
- }
- close(fd);
- return result;
-}
-
-/* Read the content of /proc/cpuinfo into a user-provided buffer.
- * Return the length of the data, or -1 on error. Does *not*
- * zero-terminate the content. Will not read more
- * than 'buffsize' bytes.
- */
-static int
-read_file(const char* pathname, char* buffer, size_t buffsize)
-{
- int fd, count;
-
- fd = open(pathname, O_RDONLY);
- if (fd < 0) {
- D("Could not open %s: %s\n", pathname, strerror(errno));
- return -1;
- }
- count = 0;
- while (count < (int)buffsize) {
- int ret = read(fd, buffer + count, buffsize - count);
- if (ret < 0) {
- if (errno == EINTR)
- continue;
- D("Error while reading from %s: %s\n", pathname, strerror(errno));
- if (count == 0)
- count = -1;
- break;
- }
- if (ret == 0)
- break;
- count += ret;
- }
- close(fd);
- return count;
-}
-
-/* Extract the content of a the first occurence of a given field in
- * the content of /proc/cpuinfo and return it as a heap-allocated
- * string that must be freed by the caller.
- *
- * Return NULL if not found
- */
-static char*
-extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
-{
- int fieldlen = strlen(field);
- const char* bufend = buffer + buflen;
- char* result = NULL;
- int len, ignore;
- const char *p, *q;
-
- /* Look for first field occurence, and ensures it starts the line. */
- p = buffer;
- for (;;) {
- p = memmem(p, bufend-p, field, fieldlen);
- if (p == NULL)
- goto EXIT;
-
- if (p == buffer || p[-1] == '\n')
- break;
-
- p += fieldlen;
- }
-
- /* Skip to the first column followed by a space */
- p += fieldlen;
- p = memchr(p, ':', bufend-p);
- if (p == NULL || p[1] != ' ')
- goto EXIT;
-
- /* Find the end of the line */
- p += 2;
- q = memchr(p, '\n', bufend-p);
- if (q == NULL)
- q = bufend;
-
- /* Copy the line into a heap-allocated buffer */
- len = q-p;
- result = malloc(len+1);
- if (result == NULL)
- goto EXIT;
-
- memcpy(result, p, len);
- result[len] = '\0';
-
-EXIT:
- return result;
-}
-
-/* Checks that a space-separated list of items contains one given 'item'.
- * Returns 1 if found, 0 otherwise.
- */
-static int
-has_list_item(const char* list, const char* item)
-{
- const char* p = list;
- int itemlen = strlen(item);
-
- if (list == NULL)
- return 0;
-
- while (*p) {
- const char* q;
-
- /* skip spaces */
- while (*p == ' ' || *p == '\t')
- p++;
-
- /* find end of current list item */
- q = p;
- while (*q && *q != ' ' && *q != '\t')
- q++;
-
- if (itemlen == q-p && !memcmp(p, item, itemlen))
- return 1;
-
- /* skip to next item */
- p = q;
- }
- return 0;
-}
-
-/* Parse a number starting from 'input', but not going further
- * than 'limit'. Return the value into '*result'.
- *
- * NOTE: Does not skip over leading spaces, or deal with sign characters.
- * NOTE: Ignores overflows.
- *
- * The function returns NULL in case of error (bad format), or the new
- * position after the decimal number in case of success (which will always
- * be <= 'limit').
- */
-static const char*
-parse_number(const char* input, const char* limit, int base, int* result)
-{
- const char* p = input;
- int val = 0;
- while (p < limit) {
- int d = (*p - '0');
- if ((unsigned)d >= 10U) {
- d = (*p - 'a');
- if ((unsigned)d >= 6U)
- d = (*p - 'A');
- if ((unsigned)d >= 6U)
- break;
- d += 10;
- }
- if (d >= base)
- break;
- val = val*base + d;
- p++;
- }
- if (p == input)
- return NULL;
-
- *result = val;
- return p;
-}
-
-static const char*
-parse_decimal(const char* input, const char* limit, int* result)
-{
- return parse_number(input, limit, 10, result);
-}
-
-static const char*
-parse_hexadecimal(const char* input, const char* limit, int* result)
-{
- return parse_number(input, limit, 16, result);
-}
-
-/* This small data type is used to represent a CPU list / mask, as read
- * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt
- *
- * For now, we don't expect more than 32 cores on mobile devices, so keep
- * everything simple.
- */
-typedef struct {
- uint32_t mask;
-} CpuList;
-
-static __inline__ void
-cpulist_init(CpuList* list) {
- list->mask = 0;
-}
-
-static __inline__ void
-cpulist_and(CpuList* list1, CpuList* list2) {
- list1->mask &= list2->mask;
-}
-
-static __inline__ void
-cpulist_set(CpuList* list, int index) {
- if ((unsigned)index < 32) {
- list->mask |= (uint32_t)(1U << index);
- }
-}
-
-static __inline__ int
-cpulist_count(CpuList* list) {
- return __builtin_popcount(list->mask);
-}
-
-/* Parse a textual list of cpus and store the result inside a CpuList object.
- * Input format is the following:
- * - comma-separated list of items (no spaces)
- * - each item is either a single decimal number (cpu index), or a range made
- * of two numbers separated by a single dash (-). Ranges are inclusive.
- *
- * Examples: 0
- * 2,4-127,128-143
- * 0-1
- */
-static void
-cpulist_parse(CpuList* list, const char* line, int line_len)
-{
- const char* p = line;
- const char* end = p + line_len;
- const char* q;
-
- /* NOTE: the input line coming from sysfs typically contains a
- * trailing newline, so take care of it in the code below
- */
- while (p < end && *p != '\n')
- {
- int val, start_value, end_value;
-
- /* Find the end of current item, and put it into 'q' */
- q = memchr(p, ',', end-p);
- if (q == NULL) {
- q = end;
- }
-
- /* Get first value */
- p = parse_decimal(p, q, &start_value);
- if (p == NULL)
- goto BAD_FORMAT;
-
- end_value = start_value;
-
- /* If we're not at the end of the item, expect a dash and
- * and integer; extract end value.
- */
- if (p < q && *p == '-') {
- p = parse_decimal(p+1, q, &end_value);
- if (p == NULL)
- goto BAD_FORMAT;
- }
-
- /* Set bits CPU list bits */
- for (val = start_value; val <= end_value; val++) {
- cpulist_set(list, val);
- }
-
- /* Jump to next item */
- p = q;
- if (p < end)
- p++;
- }
-
-BAD_FORMAT:
- ;
-}
-
-/* Read a CPU list from one sysfs file */
-static void
-cpulist_read_from(CpuList* list, const char* filename)
-{
- char file[64];
- int filelen;
-
- cpulist_init(list);
-
- filelen = read_file(filename, file, sizeof file);
- if (filelen < 0) {
- D("Could not read %s: %s\n", filename, strerror(errno));
- return;
- }
-
- cpulist_parse(list, file, filelen);
-}
-
-// See <asm/hwcap.h> kernel header.
-#define HWCAP_VFP (1 << 6)
-#define HWCAP_IWMMXT (1 << 9)
-#define HWCAP_NEON (1 << 12)
-#define HWCAP_VFPv3 (1 << 13)
-#define HWCAP_VFPv3D16 (1 << 14)
-#define HWCAP_VFPv4 (1 << 16)
-#define HWCAP_IDIVA (1 << 17)
-#define HWCAP_IDIVT (1 << 18)
-
-#define AT_HWCAP 16
-
-#if defined(__arm__)
-/* Compute the ELF HWCAP flags.
- */
-static uint32_t
-get_elf_hwcap(const char* cpuinfo, int cpuinfo_len)
-{
- /* IMPORTANT:
- * Accessing /proc/self/auxv doesn't work anymore on all
- * platform versions. More specifically, when running inside
- * a regular application process, most of /proc/self/ will be
- * non-readable, including /proc/self/auxv. This doesn't
- * happen however if the application is debuggable, or when
- * running under the "shell" UID, which is why this was not
- * detected appropriately.
- */
-#if 0
- uint32_t result = 0;
- const char filepath[] = "/proc/self/auxv";
- int fd = open(filepath, O_RDONLY);
- if (fd < 0) {
- D("Could not open %s: %s\n", filepath, strerror(errno));
- return 0;
- }
-
- struct { uint32_t tag; uint32_t value; } entry;
-
- for (;;) {
- int ret = read(fd, (char*)&entry, sizeof entry);
- if (ret < 0) {
- if (errno == EINTR)
- continue;
- D("Error while reading %s: %s\n", filepath, strerror(errno));
- break;
- }
- // Detect end of list.
- if (ret == 0 || (entry.tag == 0 && entry.value == 0))
- break;
- if (entry.tag == AT_HWCAP) {
- result = entry.value;
- break;
- }
- }
- close(fd);
- return result;
-#else
- // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag.
- uint32_t hwcaps = 0;
-
- char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
-
- if (cpuFeatures != NULL) {
- D("Found cpuFeatures = '%s'\n", cpuFeatures);
-
- if (has_list_item(cpuFeatures, "vfp"))
- hwcaps |= HWCAP_VFP;
- if (has_list_item(cpuFeatures, "vfpv3"))
- hwcaps |= HWCAP_VFPv3;
- if (has_list_item(cpuFeatures, "vfpv3d16"))
- hwcaps |= HWCAP_VFPv3D16;
- if (has_list_item(cpuFeatures, "vfpv4"))
- hwcaps |= HWCAP_VFPv4;
- if (has_list_item(cpuFeatures, "neon"))
- hwcaps |= HWCAP_NEON;
- if (has_list_item(cpuFeatures, "idiva"))
- hwcaps |= HWCAP_IDIVA;
- if (has_list_item(cpuFeatures, "idivt"))
- hwcaps |= HWCAP_IDIVT;
- if (has_list_item(cpuFeatures, "idiv"))
- hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;
- if (has_list_item(cpuFeatures, "iwmmxt"))
- hwcaps |= HWCAP_IWMMXT;
-
- free(cpuFeatures);
- }
- return hwcaps;
-#endif
-}
-#endif /* __arm__ */
-
-/* Return the number of cpus present on a given device.
- *
- * To handle all weird kernel configurations, we need to compute the
- * intersection of the 'present' and 'possible' CPU lists and count
- * the result.
- */
-static int
-get_cpu_count(void)
-{
- CpuList cpus_present[1];
- CpuList cpus_possible[1];
-
- cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");
- cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");
-
- /* Compute the intersection of both sets to get the actual number of
- * CPU cores that can be used on this device by the kernel.
- */
- cpulist_and(cpus_present, cpus_possible);
-
- return cpulist_count(cpus_present);
-}
-
-static void
-android_cpuInitFamily(void)
-{
-#if defined(__arm__)
- g_cpuFamily = ANDROID_CPU_FAMILY_ARM;
-#elif defined(__i386__)
- g_cpuFamily = ANDROID_CPU_FAMILY_X86;
-#elif defined(__mips64)
-/* Needs to be before __mips__ since the compiler defines both */
- g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;
-#elif defined(__mips__)
- g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;
-#elif defined(__aarch64__)
- g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;
-#elif defined(__x86_64__)
- g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;
-#else
- g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
-#endif
-}
-
-static void
-android_cpuInit(void)
-{
- char* cpuinfo = NULL;
- int cpuinfo_len;
-
- android_cpuInitFamily();
-
- g_cpuFeatures = 0;
- g_cpuCount = 1;
- g_inited = 1;
-
- cpuinfo_len = get_file_size("/proc/cpuinfo");
- if (cpuinfo_len < 0) {
- D("cpuinfo_len cannot be computed!");
- return;
- }
- cpuinfo = malloc(cpuinfo_len);
- if (cpuinfo == NULL) {
- D("cpuinfo buffer could not be allocated");
- return;
- }
- cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
- D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,
- cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);
-
- if (cpuinfo_len < 0) /* should not happen */ {
- free(cpuinfo);
- return;
- }
-
- /* Count the CPU cores, the value may be 0 for single-core CPUs */
- g_cpuCount = get_cpu_count();
- if (g_cpuCount == 0) {
- g_cpuCount = 1;
- }
-
- D("found cpuCount = %d\n", g_cpuCount);
-
-#ifdef __arm__
- {
- char* features = NULL;
- char* architecture = NULL;
-
- /* Extract architecture from the "CPU Architecture" field.
- * The list is well-known, unlike the the output of
- * the 'Processor' field which can vary greatly.
- *
- * See the definition of the 'proc_arch' array in
- * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in
- * same file.
- */
- char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
-
- if (cpuArch != NULL) {
- char* end;
- long archNumber;
- int hasARMv7 = 0;
-
- D("found cpuArch = '%s'\n", cpuArch);
-
- /* read the initial decimal number, ignore the rest */
- archNumber = strtol(cpuArch, &end, 10);
-
- /* Here we assume that ARMv8 will be upwards compatible with v7
- * in the future. Unfortunately, there is no 'Features' field to
- * indicate that Thumb-2 is supported.
- */
- if (end > cpuArch && archNumber >= 7) {
- hasARMv7 = 1;
- }
-
- /* Unfortunately, it seems that certain ARMv6-based CPUs
- * report an incorrect architecture number of 7!
- *
- * See http://code.google.com/p/android/issues/detail?id=10812
- *
- * We try to correct this by looking at the 'elf_format'
- * field reported by the 'Processor' field, which is of the
- * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
- * an ARMv6-one.
- */
- if (hasARMv7) {
- char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,
- "Processor");
- if (cpuProc != NULL) {
- D("found cpuProc = '%s'\n", cpuProc);
- if (has_list_item(cpuProc, "(v6l)")) {
- D("CPU processor and architecture mismatch!!\n");
- hasARMv7 = 0;
- }
- free(cpuProc);
- }
- }
-
- if (hasARMv7) {
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;
- }
-
- /* The LDREX / STREX instructions are available from ARMv6 */
- if (archNumber >= 6) {
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;
- }
-
- free(cpuArch);
- }
-
- /* Extract the list of CPU features from ELF hwcaps */
- uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len);
-
- if (hwcaps != 0) {
- int has_vfp = (hwcaps & HWCAP_VFP);
- int has_vfpv3 = (hwcaps & HWCAP_VFPv3);
- int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);
- int has_vfpv4 = (hwcaps & HWCAP_VFPv4);
- int has_neon = (hwcaps & HWCAP_NEON);
- int has_idiva = (hwcaps & HWCAP_IDIVA);
- int has_idivt = (hwcaps & HWCAP_IDIVT);
- int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);
-
- // The kernel does a poor job at ensuring consistency when
- // describing CPU features. So lots of guessing is needed.
-
- // 'vfpv4' implies VFPv3|VFP_FMA|FP16
- if (has_vfpv4)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
- ANDROID_CPU_ARM_FEATURE_VFP_FP16 |
- ANDROID_CPU_ARM_FEATURE_VFP_FMA;
-
- // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,
- // a value of 'vfpv3' doesn't necessarily mean that the D32
- // feature is present, so be conservative. All CPUs in the
- // field that support D32 also support NEON, so this should
- // not be a problem in practice.
- if (has_vfpv3 || has_vfpv3d16)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
-
- // 'vfp' is super ambiguous. Depending on the kernel, it can
- // either mean VFPv2 or VFPv3. Make it depend on ARMv7.
- if (has_vfp) {
- if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
- else
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;
- }
-
- // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA
- if (has_neon) {
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
- ANDROID_CPU_ARM_FEATURE_NEON |
- ANDROID_CPU_ARM_FEATURE_VFP_D32;
- if (has_vfpv4)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;
- }
-
- // VFPv3 implies VFPv2 and ARMv7
- if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |
- ANDROID_CPU_ARM_FEATURE_ARMv7;
-
- if (has_idiva)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
- if (has_idivt)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;
-
- if (has_iwmmxt)
- g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;
- }
-
- /* Extract the cpuid value from various fields */
- // The CPUID value is broken up in several entries in /proc/cpuinfo.
- // This table is used to rebuild it from the entries.
- static const struct CpuIdEntry {
- const char* field;
- char format;
- char bit_lshift;
- char bit_length;
- } cpu_id_entries[] = {
- { "CPU implementer", 'x', 24, 8 },
- { "CPU variant", 'x', 20, 4 },
- { "CPU part", 'x', 4, 12 },
- { "CPU revision", 'd', 0, 4 },
- };
- size_t i;
- D("Parsing /proc/cpuinfo to recover CPUID\n");
- for (i = 0;
- i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]);
- ++i) {
- const struct CpuIdEntry* entry = &cpu_id_entries[i];
- char* value = extract_cpuinfo_field(cpuinfo,
- cpuinfo_len,
- entry->field);
- if (value == NULL)
- continue;
-
- D("field=%s value='%s'\n", entry->field, value);
- char* value_end = value + strlen(value);
- int val = 0;
- const char* start = value;
- const char* p;
- if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) {
- start += 2;
- p = parse_hexadecimal(start, value_end, &val);
- } else if (entry->format == 'x')
- p = parse_hexadecimal(value, value_end, &val);
- else
- p = parse_decimal(value, value_end, &val);
-
- if (p > (const char*)start) {
- val &= ((1 << entry->bit_length)-1);
- val <<= entry->bit_lshift;
- g_cpuIdArm |= (uint32_t) val;
- }
-
- free(value);
- }
-
- // Handle kernel configuration bugs that prevent the correct
- // reporting of CPU features.
- static const struct CpuFix {
- uint32_t cpuid;
- uint64_t or_flags;
- } cpu_fixes[] = {
- /* The Nexus 4 (Qualcomm Krait) kernel configuration
- * forgets to report IDIV support. */
- { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
- ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
- { 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
- ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
- };
- size_t n;
- for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) {
- const struct CpuFix* entry = &cpu_fixes[n];
-
- if (g_cpuIdArm == entry->cpuid)
- g_cpuFeatures |= entry->or_flags;
- }
-
- }
-#endif /* __arm__ */
-
-#ifdef __i386__
- int regs[4];
-
-/* According to http://en.wikipedia.org/wiki/CPUID */
-#define VENDOR_INTEL_b 0x756e6547
-#define VENDOR_INTEL_c 0x6c65746e
-#define VENDOR_INTEL_d 0x49656e69
-
- x86_cpuid(0, regs);
- int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&
- regs[2] == VENDOR_INTEL_c &&
- regs[3] == VENDOR_INTEL_d);
-
- x86_cpuid(1, regs);
- if ((regs[2] & (1 << 9)) != 0) {
- g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;
- }
- if ((regs[2] & (1 << 23)) != 0) {
- g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;
- }
- if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) {
- g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;
- }
-#endif
-
- free(cpuinfo);
-}
-
-
-AndroidCpuFamily
-android_getCpuFamily(void)
-{
- pthread_once(&g_once, android_cpuInit);
- return g_cpuFamily;
-}
-
-
-uint64_t
-android_getCpuFeaturesExt(void)
-{
- pthread_once(&g_once, android_cpuInit);
- return g_cpuFeatures;
-}
-
-
-int
-android_getCpuCount(void)
-{
- pthread_once(&g_once, android_cpuInit);
- return g_cpuCount;
-}
-
-static void
-android_cpuInitDummy(void)
-{
- g_inited = 1;
-}
-
-int
-android_setCpu(int cpu_count, uint64_t cpu_features)
-{
- /* Fail if the library was already initialized. */
- if (g_inited)
- return 0;
-
- android_cpuInitFamily();
- g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);
- g_cpuFeatures = cpu_features;
- pthread_once(&g_once, android_cpuInitDummy);
-
- return 1;
-}
-
-#ifdef __arm__
-uint32_t
-android_getCpuIdArm(void)
-{
- pthread_once(&g_once, android_cpuInit);
- return g_cpuIdArm;
-}
-
-int
-android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)
-{
- if (!android_setCpu(cpu_count, cpu_features))
- return 0;
-
- g_cpuIdArm = cpu_id;
- return 1;
-}
-#endif /* __arm__ */
-
-/*
- * Technical note: Making sense of ARM's FPU architecture versions.
- *
- * FPA was ARM's first attempt at an FPU architecture. There is no Android
- * device that actually uses it since this technology was already obsolete
- * when the project started. If you see references to FPA instructions
- * somewhere, you can be sure that this doesn't apply to Android at all.
- *
- * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of
- * new versions / additions to it. ARM considers this obsolete right now,
- * and no known Android device implements it either.
- *
- * VFPv2 added a few instructions to VFPv1, and is an *optional* extension
- * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device
- * supporting the 'armeabi' ABI doesn't necessarily support these.
- *
- * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used
- * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated
- * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means
- * that it provides 16 double-precision FPU registers (d0-d15) and 32
- * single-precision ones (s0-s31) which happen to be mapped to the same
- * register banks.
- *
- * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16
- * additional double precision registers (d16-d31). Note that there are
- * still only 32 single precision registers.
- *
- * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision
- * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which
- * are not supported by Android. Note that it is not compatible with VFPv2.
- *
- * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32
- * depending on context. For example GCC uses it for VFPv3-D32, but
- * the Linux kernel code uses it for VFPv3-D16 (especially in
- * /proc/cpuinfo). Always try to use the full designation when
- * possible.
- *
- * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides
- * instructions to perform parallel computations on vectors of 8, 16,
- * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all
- * NEON registers are also mapped to the same register banks.
- *
- * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to
- * perform fused multiply-accumulate on VFP registers, as well as
- * half-precision (16-bit) conversion operations.
- *
- * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision
- * registers.
- *
- * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused
- * multiply-accumulate instructions that work on the NEON registers.
- *
- * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32
- * depending on context.
- *
- * The following information was determined by scanning the binutils-2.22
- * sources:
- *
- * Basic VFP instruction subsets:
- *
- * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set.
- * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns.
- * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1.
- * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision.
- * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision.
- * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns.
- * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31.
- * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions.
- * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add
- * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add
- *
- * FPU types (excluding NEON)
- *
- * FPU_VFP_V1xD (EXT_V1xD)
- * |
- * +--------------------------+
- * | |
- * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)
- * | |
- * | |
- * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)
- * |
- * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)
- * |
- * +--------------------------+
- * | |
- * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)
- * | |
- * | FPU_VFP_V4 (+EXT_D32)
- * |
- * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)
- *
- * VFP architectures:
- *
- * ARCH_VFP_V1xD (EXT_V1xD)
- * |
- * +------------------+
- * | |
- * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)
- * | |
- * | ARCH_VFP_V3xD_FP16 (+EXT_FP16)
- * | |
- * | ARCH_VFP_V4_SP_D16 (+EXT_FMA)
- * |
- * ARCH_VFP_V1 (+EXT_V1)
- * |
- * ARCH_VFP_V2 (+EXT_V2)
- * |
- * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)
- * |
- * +-------------------+
- * | |
- * | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
- * |
- * +-------------------+
- * | |
- * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
- * | |
- * | ARCH_VFP_V4 (+EXT_D32)
- * | |
- * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
- * |
- * ARCH_VFP_V3 (+EXT_D32)
- * |
- * +-------------------+
- * | |
- * | ARCH_VFP_V3_FP16 (+EXT_FP16)
- * |
- * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
- * |
- * ARCH_NEON_FP16 (+EXT_FP16)
- *
- * -fpu=<name> values and their correspondance with FPU architectures above:
- *
- * {"vfp", FPU_ARCH_VFP_V2},
- * {"vfp9", FPU_ARCH_VFP_V2},
- * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility.
- * {"vfp10", FPU_ARCH_VFP_V2},
- * {"vfp10-r0", FPU_ARCH_VFP_V1},
- * {"vfpxd", FPU_ARCH_VFP_V1xD},
- * {"vfpv2", FPU_ARCH_VFP_V2},
- * {"vfpv3", FPU_ARCH_VFP_V3},
- * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16},
- * {"vfpv3-d16", FPU_ARCH_VFP_V3D16},
- * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16},
- * {"vfpv3xd", FPU_ARCH_VFP_V3xD},
- * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16},
- * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1},
- * {"neon-fp16", FPU_ARCH_NEON_FP16},
- * {"vfpv4", FPU_ARCH_VFP_V4},
- * {"vfpv4-d16", FPU_ARCH_VFP_V4D16},
- * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16},
- * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},
- *
- *
- * Simplified diagram that only includes FPUs supported by Android:
- * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,
- * all others are optional and must be probed at runtime.
- *
- * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)
- * |
- * +-------------------+
- * | |
- * | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
- * |
- * +-------------------+
- * | |
- * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
- * | |
- * | ARCH_VFP_V4 (+EXT_D32)
- * | |
- * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
- * |
- * ARCH_VFP_V3 (+EXT_D32)
- * |
- * +-------------------+
- * | |
- * | ARCH_VFP_V3_FP16 (+EXT_FP16)
- * |
- * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
- * |
- * ARCH_NEON_FP16 (+EXT_FP16)
- *
- */
-
-#endif // defined(__le32__)
-#endif
-
-#endif
diff --git a/drivers/theoraplayer/src/YUV/android/cpu-features.h b/drivers/theoraplayer/src/YUV/android/cpu-features.h
deleted file mode 100644
index 12d3ad5645..0000000000
--- a/drivers/theoraplayer/src/YUV/android/cpu-features.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#ifndef CPU_FEATURES_H
-#define CPU_FEATURES_H
-
-#include <sys/cdefs.h>
-#include <stdint.h>
-
-__BEGIN_DECLS
-
-typedef enum {
- ANDROID_CPU_FAMILY_UNKNOWN = 0,
- ANDROID_CPU_FAMILY_ARM,
- ANDROID_CPU_FAMILY_X86,
- ANDROID_CPU_FAMILY_MIPS,
-
- ANDROID_CPU_FAMILY_MAX /* do not remove */
-
-} AndroidCpuFamily;
-
-/* Return family of the device's CPU */
-extern AndroidCpuFamily android_getCpuFamily(void);
-
-/* The list of feature flags for ARM CPUs that can be recognized by the
- * library. Value details are:
- *
- * VFPv2:
- * CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs
- * support these instructions. VFPv2 is a subset of VFPv3 so this will
- * be set whenever VFPv3 is set too.
- *
- * ARMv7:
- * CPU supports the ARMv7-A basic instruction set.
- * This feature is mandated by the 'armeabi-v7a' ABI.
- *
- * VFPv3:
- * CPU supports the VFPv3-D16 instruction set, providing hardware FPU
- * support for single and double precision floating point registers.
- * Note that only 16 FPU registers are available by default, unless
- * the D32 bit is set too. This feature is also mandated by the
- * 'armeabi-v7a' ABI.
- *
- * VFP_D32:
- * CPU VFP optional extension that provides 32 FPU registers,
- * instead of 16. Note that ARM mandates this feature is the 'NEON'
- * feature is implemented by the CPU.
- *
- * NEON:
- * CPU FPU supports "ARM Advanced SIMD" instructions, also known as
- * NEON. Note that this mandates the VFP_D32 feature as well, per the
- * ARM Architecture specification.
- *
- * VFP_FP16:
- * Half-width floating precision VFP extension. If set, the CPU
- * supports instructions to perform floating-point operations on
- * 16-bit registers. This is part of the VFPv4 specification, but
- * not mandated by any Android ABI.
- *
- * VFP_FMA:
- * Fused multiply-accumulate VFP instructions extension. Also part of
- * the VFPv4 specification, but not mandated by any Android ABI.
- *
- * NEON_FMA:
- * Fused multiply-accumulate NEON instructions extension. Optional
- * extension from the VFPv4 specification, but not mandated by any
- * Android ABI.
- *
- * IDIV_ARM:
- * Integer division available in ARM mode. Only available
- * on recent CPUs (e.g. Cortex-A15).
- *
- * IDIV_THUMB2:
- * Integer division available in Thumb-2 mode. Only available
- * on recent CPUs (e.g. Cortex-A15).
- *
- * iWMMXt:
- * Optional extension that adds MMX registers and operations to an
- * ARM CPU. This is only available on a few XScale-based CPU designs
- * sold by Marvell. Pretty rare in practice.
- *
- * If you want to tell the compiler to generate code that targets one of
- * the feature set above, you should probably use one of the following
- * flags (for more details, see technical note at the end of this file):
- *
- * -mfpu=vfp
- * -mfpu=vfpv2
- * These are equivalent and tell GCC to use VFPv2 instructions for
- * floating-point operations. Use this if you want your code to
- * run on *some* ARMv6 devices, and any ARMv7-A device supported
- * by Android.
- *
- * Generated code requires VFPv2 feature.
- *
- * -mfpu=vfpv3-d16
- * Tell GCC to use VFPv3 instructions (using only 16 FPU registers).
- * This should be generic code that runs on any CPU that supports the
- * 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this.
- *
- * Generated code requires VFPv3 feature.
- *
- * -mfpu=vfpv3
- * Tell GCC to use VFPv3 instructions with 32 FPU registers.
- * Generated code requires VFPv3|VFP_D32 features.
- *
- * -mfpu=neon
- * Tell GCC to use VFPv3 instructions with 32 FPU registers, and
- * also support NEON intrinsics (see <arm_neon.h>).
- * Generated code requires VFPv3|VFP_D32|NEON features.
- *
- * -mfpu=vfpv4-d16
- * Generated code requires VFPv3|VFP_FP16|VFP_FMA features.
- *
- * -mfpu=vfpv4
- * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features.
- *
- * -mfpu=neon-vfpv4
- * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA
- * features.
- *
- * -mcpu=cortex-a7
- * -mcpu=cortex-a15
- * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|
- * NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2
- * This flag implies -mfpu=neon-vfpv4.
- *
- * -mcpu=iwmmxt
- * Allows the use of iWMMXt instrinsics with GCC.
- */
-enum {
- ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0),
- ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1),
- ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2),
- ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3),
- ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4),
- ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5),
- ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6),
- ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7),
- ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8),
- ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9),
- ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10),
- ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11),
-};
-
-enum {
- ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0),
- ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1),
- ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2),
-};
-
-// libtheoraplayer addition, renamed this to "Ext" as not to conflict with your own project if you've included cpu-features.c in it
-//extern uint64_t android_getCpuFeaturesExt(void);
-#define android_getCpuFeaturesExt android_getCpuFeatures
-
-/* Return the number of CPU cores detected on this device. */
-extern int android_getCpuCount(void);
-
-/* The following is used to force the CPU count and features
- * mask in sandboxed processes. Under 4.1 and higher, these processes
- * cannot access /proc, which is the only way to get information from
- * the kernel about the current hardware (at least on ARM).
- *
- * It _must_ be called only once, and before any android_getCpuXXX
- * function, any other case will fail.
- *
- * This function return 1 on success, and 0 on failure.
- */
-extern int android_setCpu(int cpu_count,
- uint64_t cpu_features);
-
-#ifdef __arm__
-/* Retrieve the ARM 32-bit CPUID value from the kernel.
- * Note that this cannot work on sandboxed processes under 4.1 and
- * higher, unless you called android_setCpuArm() before.
- */
-extern uint32_t android_getCpuIdArm(void);
-
-/* An ARM-specific variant of android_setCpu() that also allows you
- * to set the ARM CPUID field.
- */
-extern int android_setCpuArm(int cpu_count,
- uint64_t cpu_features,
- uint32_t cpu_id);
-#endif
-
-__END_DECLS
-
-#endif /* CPU_FEATURES_H */
diff --git a/drivers/theoraplayer/src/YUV/libyuv/LICENSE b/drivers/theoraplayer/src/YUV/libyuv/LICENSE
deleted file mode 100755
index c911747a6b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-Copyright 2011 The LibYuv Project Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
- * Neither the name of Google nor the names of its contributors may
- be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY b/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY
deleted file mode 100755
index a71591e771..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY
+++ /dev/null
@@ -1,8 +0,0 @@
-This source tree contains third party source code which is governed by third
-party licenses. This file contains references to files which are under other
-licenses than the one provided in the LICENSE file in the root of the source
-tree.
-
-Files governed by third party licenses:
-source/x86inc.asm
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h
deleted file mode 100755
index 3bebe642cc..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_H_ // NOLINT
-#define INCLUDE_LIBYUV_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/compare.h"
-#include "libyuv/convert.h"
-#include "libyuv/convert_argb.h"
-#include "libyuv/convert_from.h"
-#include "libyuv/convert_from_argb.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#include "libyuv/mjpeg_decoder.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/scale.h"
-#include "libyuv/scale_argb.h"
-#include "libyuv/scale_row.h"
-#include "libyuv/version.h"
-#include "libyuv/video_common.h"
-
-#endif // INCLUDE_LIBYUV_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h
deleted file mode 100755
index beb750ba65..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ // NOLINT
-#define INCLUDE_LIBYUV_BASIC_TYPES_H_
-
-#include <stddef.h> // for NULL, size_t
-
-#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
-#include <sys/types.h> // for uintptr_t on x86
-#else
-#include <stdint.h> // for uintptr_t
-#endif
-
-#ifndef GG_LONGLONG
-#ifndef INT_TYPES_DEFINED
-#define INT_TYPES_DEFINED
-#ifdef COMPILER_MSVC
-typedef unsigned __int64 uint64;
-typedef __int64 int64;
-#ifndef INT64_C
-#define INT64_C(x) x ## I64
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## UI64
-#endif
-#define INT64_F "I64"
-#else // COMPILER_MSVC
-#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long uint64; // NOLINT
-typedef long int64; // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x ## L
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## UL
-#endif
-#define INT64_F "l"
-#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long long uint64; // NOLINT
-typedef long long int64; // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x ## LL
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## ULL
-#endif
-#define INT64_F "ll"
-#endif // __LP64__
-#endif // COMPILER_MSVC
-typedef unsigned int uint32;
-typedef int int32;
-typedef unsigned short uint16; // NOLINT
-typedef short int16; // NOLINT
-typedef unsigned char uint8;
-typedef signed char int8;
-#endif // INT_TYPES_DEFINED
-#endif // GG_LONGLONG
-
-// Detect compiler is for x86 or x64.
-#if defined(__x86_64__) || defined(_M_X64) || \
- defined(__i386__) || defined(_M_IX86)
-#define CPU_X86 1
-#endif
-// Detect compiler is for ARM.
-#if defined(__arm__) || defined(_M_ARM)
-#define CPU_ARM 1
-#endif
-
-#ifndef ALIGNP
-#ifdef __cplusplus
-#define ALIGNP(p, t) \
- (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
- ((t) - 1)) & ~((t) - 1))))
-#else
-#define ALIGNP(p, t) \
- ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */
-#endif
-#endif
-
-#if !defined(LIBYUV_API)
-#if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
-#define LIBYUV_API __declspec(dllexport)
-#elif defined(LIBYUV_USING_SHARED_LIBRARY)
-#define LIBYUV_API __declspec(dllimport)
-#else
-#define LIBYUV_API
-#endif // LIBYUV_BUILDING_SHARED_LIBRARY
-#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
- (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
- defined(LIBYUV_USING_SHARED_LIBRARY))
-#define LIBYUV_API __attribute__ ((visibility ("default")))
-#else
-#define LIBYUV_API
-#endif // __GNUC__
-#endif // LIBYUV_API
-
-#define LIBYUV_BOOL int
-#define LIBYUV_FALSE 0
-#define LIBYUV_TRUE 1
-
-// Visual C x86 or GCC little endian.
-#if defined(__x86_64__) || defined(_M_X64) || \
- defined(__i386__) || defined(_M_IX86) || \
- defined(__arm__) || defined(_M_ARM) || \
- (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#define LIBYUV_LITTLE_ENDIAN
-#endif
-
-#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h
deleted file mode 100755
index 5dfac7c86a..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT
-#define INCLUDE_LIBYUV_COMPARE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Compute a hash for specified memory. Seed of 5381 recommended.
-LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
-
-// Sum Square Error - used to compute Mean Square Error or PSNR.
-LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a,
- const uint8* src_b, int count);
-
-LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height);
-
-static const int kMaxPsnr = 128;
-
-LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count);
-
-LIBYUV_API
-double CalcFramePsnr(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height);
-
-LIBYUV_API
-double I420Psnr(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height);
-
-LIBYUV_API
-double CalcFrameSsim(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height);
-
-LIBYUV_API
-double I420Ssim(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h
deleted file mode 100755
index 1bd45c837f..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_H_
-
-#include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes.
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert I444 to I420.
-LIBYUV_API
-int I444ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert I422 to I420.
-LIBYUV_API
-int I422ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert I411 to I420.
-LIBYUV_API
-int I411ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Copy I420 to I420.
-#define I420ToI420 I420Copy
-LIBYUV_API
-int I420Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert I400 (grey) to I420.
-LIBYUV_API
-int I400ToI420(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert NV12 to I420.
-LIBYUV_API
-int NV12ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert NV21 to I420.
-LIBYUV_API
-int NV21ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert YUY2 to I420.
-LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert UYVY to I420.
-LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert M420 to I420.
-LIBYUV_API
-int M420ToI420(const uint8* src_m420, int src_stride_m420,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert Q420 to I420.
-LIBYUV_API
-int Q420ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// ARGB little endian (bgra in memory) to I420.
-LIBYUV_API
-int ARGBToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// BGRA little endian (argb in memory) to I420.
-LIBYUV_API
-int BGRAToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// ABGR little endian (rgba in memory) to I420.
-LIBYUV_API
-int ABGRToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGBA little endian (abgr in memory) to I420.
-LIBYUV_API
-int RGBAToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB little endian (bgr in memory) to I420.
-LIBYUV_API
-int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB big endian (rgb in memory) to I420.
-LIBYUV_API
-int RAWToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB16 (RGBP fourcc) little endian to I420.
-LIBYUV_API
-int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB15 (RGBO fourcc) little endian to I420.
-LIBYUV_API
-int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB12 (R444 fourcc) little endian to I420.
-LIBYUV_API
-int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-#ifdef HAVE_JPEG
-// src_width/height provided by capture.
-// dst_width/height for clipping determine final size.
-LIBYUV_API
-int MJPGToI420(const uint8* sample, size_t sample_size,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_width, int src_height,
- int dst_width, int dst_height);
-
-// Query size of MJPG in pixels.
-LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size,
- int* width, int* height);
-#endif
-
-// Note Bayer formats (BGGR) To I420 are in format_conversion.h
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// "src_size" is needed to parse MJPG.
-// "dst_stride_y" number of bytes in a row of the dst_y plane.
-// Normally this would be the same as dst_width, with recommended alignment
-// to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected. The caller should
-// allocate the I420 buffer according to rotation.
-// "dst_stride_u" number of bytes in a row of the dst_u plane.
-// Normally this would be the same as (dst_width + 1) / 2, with
-// recommended alignment to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected.
-// "crop_x" and "crop_y" are starting position for cropping.
-// To center, crop_x = (src_width - dst_width) / 2
-// crop_y = (src_height - dst_height) / 2
-// "src_width" / "src_height" is size of src_frame in pixels.
-// "src_height" can be negative indicating a vertically flipped image source.
-// "crop_width" / "crop_height" is the size to crop the src to.
-// Must be less than or equal to src_width/src_height
-// Cropping parameters are pre-rotation.
-// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
-// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
-LIBYUV_API
-int ConvertToI420(const uint8* src_frame, size_t src_size,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 format);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h
deleted file mode 100755
index a18014ca2c..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
-
-#include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-
-// TODO(fbarchard): This set of functions should exactly match convert.h
-// Add missing Q420.
-// TODO(fbarchard): Add tests. Create random content of right size and convert
-// with C vs Opt and or to I420 and compare.
-// TODO(fbarchard): Some of these functions lack parameter setting.
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Alias.
-#define ARGBToARGB ARGBCopy
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I422 to ARGB.
-LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I411 to ARGB.
-LIBYUV_API
-int I411ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I400 (grey) to ARGB.
-LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Alias.
-#define YToARGB I400ToARGB_Reference
-
-// Convert I400 to ARGB. Reverse of ARGBToI400.
-LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert NV12 to ARGB.
-LIBYUV_API
-int NV12ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert NV21 to ARGB.
-LIBYUV_API
-int NV21ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert M420 to ARGB.
-LIBYUV_API
-int M420ToARGB(const uint8* src_m420, int src_stride_m420,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// TODO(fbarchard): Convert Q420 to ARGB.
-// LIBYUV_API
-// int Q420ToARGB(const uint8* src_y, int src_stride_y,
-// const uint8* src_yuy2, int src_stride_yuy2,
-// uint8* dst_argb, int dst_stride_argb,
-// int width, int height);
-
-// Convert YUY2 to ARGB.
-LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert UYVY to ARGB.
-LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// BGRA little endian (argb in memory) to ARGB.
-LIBYUV_API
-int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// ABGR little endian (rgba in memory) to ARGB.
-LIBYUV_API
-int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGBA little endian (abgr in memory) to ARGB.
-LIBYUV_API
-int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Deprecated function name.
-#define BG24ToARGB RGB24ToARGB
-
-// RGB little endian (bgr in memory) to ARGB.
-LIBYUV_API
-int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB big endian (rgb in memory) to ARGB.
-LIBYUV_API
-int RAWToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB16 (RGBP fourcc) little endian to ARGB.
-LIBYUV_API
-int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB15 (RGBO fourcc) little endian to ARGB.
-LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB12 (R444 fourcc) little endian to ARGB.
-LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-#ifdef HAVE_JPEG
-// src_width/height provided by capture
-// dst_width/height for clipping determine final size.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample, size_t sample_size,
- uint8* dst_argb, int dst_stride_argb,
- int src_width, int src_height,
- int dst_width, int dst_height);
-#endif
-
-// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
-
-// Convert camera sample to ARGB with cropping, rotation and vertical flip.
-// "src_size" is needed to parse MJPG.
-// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
-// Normally this would be the same as dst_width, with recommended alignment
-// to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected. The caller should
-// allocate the I420 buffer according to rotation.
-// "dst_stride_u" number of bytes in a row of the dst_u plane.
-// Normally this would be the same as (dst_width + 1) / 2, with
-// recommended alignment to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected.
-// "crop_x" and "crop_y" are starting position for cropping.
-// To center, crop_x = (src_width - dst_width) / 2
-// crop_y = (src_height - dst_height) / 2
-// "src_width" / "src_height" is size of src_frame in pixels.
-// "src_height" can be negative indicating a vertically flipped image source.
-// "crop_width" / "crop_height" is the size to crop the src to.
-// Must be less than or equal to src_width/src_height
-// Cropping parameters are pre-rotation.
-// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
-// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
-LIBYUV_API
-int ConvertToARGB(const uint8* src_frame, size_t src_size,
- uint8* dst_argb, int dst_stride_argb,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 format);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h
deleted file mode 100755
index b1cf57f7dc..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_FROM_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/rotate.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// See Also convert.h for conversions from formats to I420.
-
-// I420Copy in convert to I420ToI420.
-
-LIBYUV_API
-int I420ToI422(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int I420ToI444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int I420ToI411(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
-LIBYUV_API
-int I400Copy(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// TODO(fbarchard): I420ToM420
-// TODO(fbarchard): I420ToQ420
-
-LIBYUV_API
-int I420ToNV12(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height);
-
-LIBYUV_API
-int I420ToNV21(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height);
-
-LIBYUV_API
-int I420ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int I420ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height);
-
-LIBYUV_API
-int I420ToRGB24(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToRAW(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToARGB1555(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToARGB4444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
-
-// Convert I420 to specified format.
-// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
-// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
-LIBYUV_API
-int ConvertFromI420(const uint8* y, int y_stride,
- const uint8* u, int u_stride,
- const uint8* v, int v_stride,
- uint8* dst_sample, int dst_sample_stride,
- int width, int height,
- uint32 format);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h
deleted file mode 100755
index f0343a77d3..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy ARGB to ARGB.
-#define ARGBToARGB ARGBCopy
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert ARGB To BGRA. (alias)
-#define ARGBToBGRA BGRAToARGB
-LIBYUV_API
-int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert ARGB To ABGR. (alias)
-#define ARGBToABGR ABGRToARGB
-LIBYUV_API
-int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert ARGB To RGBA.
-LIBYUV_API
-int ARGBToRGBA(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert ARGB To RGB24.
-LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb24, int dst_stride_rgb24,
- int width, int height);
-
-// Convert ARGB To RAW.
-LIBYUV_API
-int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb, int dst_stride_rgb,
- int width, int height);
-
-// Convert ARGB To RGB565.
-LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height);
-
-// Convert ARGB To ARGB1555.
-LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb1555, int dst_stride_argb1555,
- int width, int height);
-
-// Convert ARGB To ARGB4444.
-LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb4444, int dst_stride_argb4444,
- int width, int height);
-
-// Convert ARGB To I444.
-LIBYUV_API
-int ARGBToI444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB To I422.
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB To I420. (also in convert.h)
-LIBYUV_API
-int ARGBToI420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB to J420. (JPeg full range I420).
-LIBYUV_API
-int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB To I411.
-LIBYUV_API
-int ARGBToI411(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB to J400. (JPeg full range).
-LIBYUV_API
-int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- int width, int height);
-
-// Convert ARGB to I400.
-LIBYUV_API
-int ARGBToI400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Convert ARGB To NV12.
-LIBYUV_API
-int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height);
-
-// Convert ARGB To YUY2.
-LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height);
-
-// Convert ARGB To UYVY.
-LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h
deleted file mode 100755
index dc858a814a..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT
-#define INCLUDE_LIBYUV_CPU_ID_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// TODO(fbarchard): Consider overlapping bits for different architectures.
-// Internal flag to indicate cpuid requires initialization.
-#define kCpuInit 0x1
-
-// These flags are only valid on ARM processors.
-static const int kCpuHasARM = 0x2;
-static const int kCpuHasNEON = 0x4;
-// 0x8 reserved for future ARM flag.
-
-// These flags are only valid on x86 processors.
-static const int kCpuHasX86 = 0x10;
-static const int kCpuHasSSE2 = 0x20;
-static const int kCpuHasSSSE3 = 0x40;
-static const int kCpuHasSSE41 = 0x80;
-static const int kCpuHasSSE42 = 0x100;
-static const int kCpuHasAVX = 0x200;
-static const int kCpuHasAVX2 = 0x400;
-static const int kCpuHasERMS = 0x800;
-static const int kCpuHasFMA3 = 0x1000;
-// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
-
-// These flags are only valid on MIPS processors.
-static const int kCpuHasMIPS = 0x10000;
-static const int kCpuHasMIPS_DSP = 0x20000;
-static const int kCpuHasMIPS_DSPR2 = 0x40000;
-
-// Internal function used to auto-init.
-LIBYUV_API
-int InitCpuFlags(void);
-
-// Internal function for parsing /proc/cpuinfo.
-LIBYUV_API
-int ArmCpuCaps(const char* cpuinfo_name);
-
-// Detect CPU has SSE2 etc.
-// Test_flag parameter should be one of kCpuHas constants above.
-// returns non-zero if instruction set is detected
-static __inline int TestCpuFlag(int test_flag) {
- LIBYUV_API extern int cpu_info_;
- return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
-}
-
-// For testing, allow CPU flags to be disabled.
-// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
-// MaskCpuFlags(-1) to enable all cpu specific optimizations.
-// MaskCpuFlags(0) to disable all cpu specific optimizations.
-LIBYUV_API
-void MaskCpuFlags(int enable_flags);
-
-// Low level cpuid for X86. Returns zeros on other CPUs.
-// eax is the info type that you want.
-// ecx is typically the cpu number, and should normally be zero.
-LIBYUV_API
-void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CPU_ID_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h
deleted file mode 100755
index b18bf05343..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT
-#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert Bayer RGB formats to I420.
-LIBYUV_API
-int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Temporary API mapper.
-#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
- BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
-
-LIBYUV_API
-int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- uint32 src_fourcc_bayer);
-
-// Convert I420 to Bayer RGB formats.
-LIBYUV_API
-int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Temporary API mapper.
-#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
- I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
-
-LIBYUV_API
-int I420ToBayer(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height,
- uint32 dst_fourcc_bayer);
-
-// Convert Bayer RGB formats to ARGB.
-LIBYUV_API
-int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Temporary API mapper.
-#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
-
-LIBYUV_API
-int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
- uint32 src_fourcc_bayer);
-
-// Converts ARGB to Bayer RGB formats.
-LIBYUV_API
-int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-LIBYUV_API
-int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-LIBYUV_API
-int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-LIBYUV_API
-int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height);
-
-// Temporary API mapper.
-#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
-
-LIBYUV_API
-int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height,
- uint32 dst_fourcc_bayer);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h
deleted file mode 100755
index faffaea8fa..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT
-#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-// NOTE: For a simplified public API use convert.h MJPGToI420().
-
-struct jpeg_common_struct;
-struct jpeg_decompress_struct;
-struct jpeg_source_mgr;
-
-namespace libyuv {
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-static const uint32 kUnknownDataSize = 0xFFFFFFFF;
-
-enum JpegSubsamplingType {
- kJpegYuv420,
- kJpegYuv422,
- kJpegYuv411,
- kJpegYuv444,
- kJpegYuv400,
- kJpegUnknown
-};
-
-struct SetJmpErrorMgr;
-
-// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
-// simply independent JPEG images with a fixed huffman table (which is omitted).
-// It is rarely used in video transmission, but is common as a camera capture
-// format, especially in Logitech devices. This class implements a decoder for
-// MJPEG frames.
-//
-// See http://tools.ietf.org/html/rfc2435
-class LIBYUV_API MJpegDecoder {
- public:
- typedef void (*CallbackFunction)(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows);
-
- static const int kColorSpaceUnknown;
- static const int kColorSpaceGrayscale;
- static const int kColorSpaceRgb;
- static const int kColorSpaceYCbCr;
- static const int kColorSpaceCMYK;
- static const int kColorSpaceYCCK;
-
- MJpegDecoder();
- ~MJpegDecoder();
-
- // Loads a new frame, reads its headers, and determines the uncompressed
- // image format.
- // Returns LIBYUV_TRUE if image looks valid and format is supported.
- // If return value is LIBYUV_TRUE, then the values for all the following
- // getters are populated.
- // src_len is the size of the compressed mjpeg frame in bytes.
- LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
-
- // Returns width of the last loaded frame in pixels.
- int GetWidth();
-
- // Returns height of the last loaded frame in pixels.
- int GetHeight();
-
- // Returns format of the last loaded frame. The return value is one of the
- // kColorSpace* constants.
- int GetColorSpace();
-
- // Number of color components in the color space.
- int GetNumComponents();
-
- // Sample factors of the n-th component.
- int GetHorizSampFactor(int component);
-
- int GetVertSampFactor(int component);
-
- int GetHorizSubSampFactor(int component);
-
- int GetVertSubSampFactor(int component);
-
- // Public for testability.
- int GetImageScanlinesPerImcuRow();
-
- // Public for testability.
- int GetComponentScanlinesPerImcuRow(int component);
-
- // Width of a component in bytes.
- int GetComponentWidth(int component);
-
- // Height of a component.
- int GetComponentHeight(int component);
-
- // Width of a component in bytes with padding for DCTSIZE. Public for testing.
- int GetComponentStride(int component);
-
- // Size of a component in bytes.
- int GetComponentSize(int component);
-
- // Call this after LoadFrame() if you decide you don't want to decode it
- // after all.
- LIBYUV_BOOL UnloadFrame();
-
- // Decodes the entire image into a one-buffer-per-color-component format.
- // dst_width must match exactly. dst_height must be <= to image height; if
- // less, the image is cropped. "planes" must have size equal to at least
- // GetNumComponents() and they must point to non-overlapping buffers of size
- // at least GetComponentSize(i). The pointers in planes are incremented
- // to point to after the end of the written data.
- // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
- LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
-
- // Decodes the entire image and passes the data via repeated calls to a
- // callback function. Each call will get the data for a whole number of
- // image scanlines.
- // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
- LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
- int dst_width, int dst_height);
-
- // The helper function which recognizes the jpeg sub-sampling type.
- static JpegSubsamplingType JpegSubsamplingTypeHelper(
- int* subsample_x, int* subsample_y, int number_of_components);
-
- private:
- struct Buffer {
- const uint8* data;
- int len;
- };
-
- struct BufferVector {
- Buffer* buffers;
- int len;
- int pos;
- };
-
- // Methods that are passed to jpeglib.
- static int fill_input_buffer(jpeg_decompress_struct* cinfo);
- static void init_source(jpeg_decompress_struct* cinfo);
- static void skip_input_data(jpeg_decompress_struct* cinfo,
- long num_bytes); // NOLINT
- static void term_source(jpeg_decompress_struct* cinfo);
-
- static void ErrorHandler(jpeg_common_struct* cinfo);
-
- void AllocOutputBuffers(int num_outbufs);
- void DestroyOutputBuffers();
-
- LIBYUV_BOOL StartDecode();
- LIBYUV_BOOL FinishDecode();
-
- void SetScanlinePointers(uint8** data);
- LIBYUV_BOOL DecodeImcuRow();
-
- int GetComponentScanlinePadding(int component);
-
- // A buffer holding the input data for a frame.
- Buffer buf_;
- BufferVector buf_vec_;
-
- jpeg_decompress_struct* decompress_struct_;
- jpeg_source_mgr* source_mgr_;
- SetJmpErrorMgr* error_mgr_;
-
- // LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
- // GetComponentScanlinePadding() != 0.)
- LIBYUV_BOOL has_scanline_padding_;
-
- // Temporaries used to point to scanline outputs.
- int num_outbufs_; // Outermost size of all arrays below.
- uint8*** scanlines_;
- int* scanlines_sizes_;
- // Temporary buffer used for decoding when we can't decode directly to the
- // output buffers. Large enough for just one iMCU row.
- uint8** databuf_;
- int* databuf_strides_;
-};
-
-} // namespace libyuv
-
-#endif // __cplusplus
-#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h
deleted file mode 100755
index ac516c5ba5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT
-#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
-
-#include "libyuv/basic_types.h"
-
-// TODO(fbarchard): Remove the following headers includes.
-#include "libyuv/convert.h"
-#include "libyuv/convert_argb.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy a plane of data.
-LIBYUV_API
-void CopyPlane(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Set a plane of data to a 32 bit value.
-LIBYUV_API
-void SetPlane(uint8* dst_y, int dst_stride_y,
- int width, int height,
- uint32 value);
-
-// Copy I400. Supports inverting.
-LIBYUV_API
-int I400ToI400(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-
-// Copy I422 to I422.
-#define I422ToI422 I422Copy
-LIBYUV_API
-int I422Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Copy I444 to I444.
-#define I444ToI444 I444Copy
-LIBYUV_API
-int I444Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert YUY2 to I422.
-LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert UYVY to I422.
-LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert I420 to I400. (calls CopyPlane ignoring u/v).
-LIBYUV_API
-int I420ToI400(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Alias
-#define I420ToI420Mirror I420Mirror
-
-// I420 mirror.
-LIBYUV_API
-int I420Mirror(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Alias
-#define I400ToI400Mirror I400Mirror
-
-// I400 mirror. A single plane is mirrored horizontally.
-// Pass negative height to achieve 180 degree rotation.
-LIBYUV_API
-int I400Mirror(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Alias
-#define ARGBToARGBMirror ARGBMirror
-
-// ARGB mirror.
-LIBYUV_API
-int ARGBMirror(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert NV12 to RGB565.
-LIBYUV_API
-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height);
-
-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height);
-
-// I422ToARGB is in convert_argb.h
-// Convert I422 to BGRA.
-LIBYUV_API
-int I422ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height);
-
-// Convert I422 to ABGR.
-LIBYUV_API
-int I422ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height);
-
-// Convert I422 to RGBA.
-LIBYUV_API
-int I422ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height);
-
-// Draw a rectangle into I420.
-LIBYUV_API
-int I420Rect(uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int x, int y, int width, int height,
- int value_y, int value_u, int value_v);
-
-// Draw a rectangle into ARGB.
-LIBYUV_API
-int ARGBRect(uint8* dst_argb, int dst_stride_argb,
- int x, int y, int width, int height, uint32 value);
-
-// Convert ARGB to gray scale ARGB.
-LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Make a rectangle of ARGB gray scale.
-LIBYUV_API
-int ARGBGray(uint8* dst_argb, int dst_stride_argb,
- int x, int y, int width, int height);
-
-// Make a rectangle of ARGB Sepia tone.
-LIBYUV_API
-int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
- int x, int y, int width, int height);
-
-// Apply a matrix rotation to each ARGB pixel.
-// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
-// The first 4 coefficients apply to B, G, R, A and produce B of the output.
-// The next 4 coefficients apply to B, G, R, A and produce G of the output.
-// The next 4 coefficients apply to B, G, R, A and produce R of the output.
-// The last 4 coefficients apply to B, G, R, A and produce A of the output.
-LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_argb,
- int width, int height);
-
-// Deprecated. Use ARGBColorMatrix instead.
-// Apply a matrix rotation to each ARGB pixel.
-// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
-// The first 4 coefficients apply to B, G, R, A and produce B of the output.
-// The next 4 coefficients apply to B, G, R, A and produce G of the output.
-// The last 4 coefficients apply to B, G, R, A and produce R of the output.
-LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_rgb,
- int x, int y, int width, int height);
-
-// Apply a color table each ARGB pixel.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int x, int y, int width, int height);
-
-// Apply a color table each ARGB pixel but preserve destination alpha.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int x, int y, int width, int height);
-
-// Apply a luma/color table each ARGB pixel but preserve destination alpha.
-// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
-// RGB (YJ style) and C is an 8 bit color component (R, G or B).
-LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* luma_rgb_table,
- int width, int height);
-
-// Apply a 3 term polynomial to ARGB values.
-// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
-// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
-// g squared, r squared and a squared. The 4rd row is coefficients for b to
-// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
-// result clamped to 0 to 255.
-// A polynomial approximation can be dirived using software such as 'R'.
-
-LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const float* poly,
- int width, int height);
-
-// Quantize a rectangle of ARGB. Alpha unaffected.
-// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
-// interval_size should be a value between 1 and 255.
-// interval_offset should be a value between 0 and 255.
-LIBYUV_API
-int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
- int scale, int interval_size, int interval_offset,
- int x, int y, int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// Get function to Alpha Blend ARGB pixels and store to destination.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend();
-
-// Alpha Blend ARGB images and store to destination.
-// Alpha of destination is set to 255.
-LIBYUV_API
-int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
-LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Add ARGB image with ARGB image. Saturates to 255.
-LIBYUV_API
-int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
-LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I422 to YUY2.
-LIBYUV_API
-int I422ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Convert I422 to UYVY.
-LIBYUV_API
-int I422ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Convert unattentuated ARGB to preattenuated ARGB.
-LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert preattentuated ARGB to unattenuated ARGB.
-LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert MJPG to ARGB.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample, size_t sample_size,
- uint8* argb, int argb_stride,
- int w, int h, int dw, int dh);
-
-// Internal function - do not call directly.
-// Computes table of cumulative sum for image where the value is the sum
-// of all values above and to the left of the entry. Used by ARGBBlur.
-LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height);
-
-// Blur ARGB image.
-// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
-// 16 byte boundary.
-// dst_stride32_cumsum is number of ints in a row (width * 4).
-// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
-// Blur is optimized for radius of 5 (11x11) or less.
-LIBYUV_API
-int ARGBBlur(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height, int radius);
-
-// Multiply ARGB image by ARGB value.
-LIBYUV_API
-int ARGBShade(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, uint32 value);
-
-// Interpolate between two ARGB images using specified amount of interpolation
-// (0 to 255) and store to destination.
-// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
-// and 255 means 1% src_argb0 and 99% src_argb1.
-// Internally uses ARGBScale bilinear filtering.
-// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
-LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, int interpolation);
-
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
- defined(TARGET_IPHONE_SIMULATOR)
-#define LIBYUV_DISABLE_X86
-#endif
-
-// Row functions for copying a pixels from a source with a slope to a row
-// of destination. Useful for scaling, rotation, mirror, texture mapping.
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-#define HAS_ARGBAFFINEROW_SSE2
-#endif // LIBYUV_DISABLE_X86
-
-// Shuffle ARGB channel order. e.g. BGRA to ARGB.
-// shuffler is 16 bytes and must be aligned.
-LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* shuffler, int width, int height);
-
-// Sobel ARGB effect with planar output.
-LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Sobel ARGB effect.
-LIBYUV_API
-int ARGBSobel(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
-LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h
deleted file mode 100755
index 8af60b8955..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT
-#define INCLUDE_LIBYUV_ROTATE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Supported rotation.
-typedef enum RotationMode {
- kRotate0 = 0, // No rotation.
- kRotate90 = 90, // Rotate 90 degrees clockwise.
- kRotate180 = 180, // Rotate 180 degrees.
- kRotate270 = 270, // Rotate 270 degrees clockwise.
-
- // Deprecated.
- kRotateNone = 0,
- kRotateClockwise = 90,
- kRotateCounterClockwise = 270,
-} RotationModeEnum;
-
-// Rotate I420 frame.
-LIBYUV_API
-int I420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_width, int src_height, enum RotationMode mode);
-
-// Rotate NV12 input and store in I420.
-LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_width, int src_height, enum RotationMode mode);
-
-// Rotate a plane by 0, 90, 180, or 270.
-LIBYUV_API
-int RotatePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int src_width, int src_height, enum RotationMode mode);
-
-// Rotate planes by 90, 180, 270. Deprecated.
-LIBYUV_API
-void RotatePlane90(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void RotatePlane180(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void RotatePlane270(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void RotateUV90(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-// Rotations for when U and V are interleaved.
-// These functions take one input pointer and
-// split the data into two buffers while
-// rotating them. Deprecated.
-LIBYUV_API
-void RotateUV180(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-LIBYUV_API
-void RotateUV270(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-// The 90 and 270 functions are based on transposes.
-// Doing a transpose with reversing the read/write
-// order will result in a rotation by +- 90 degrees.
-// Deprecated.
-LIBYUV_API
-void TransposePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void TransposeUV(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h
deleted file mode 100755
index 660ff5573e..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/rotate.h" // For RotationMode.
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Rotate ARGB frame
-LIBYUV_API
-int ARGBRotate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int src_width, int src_height, enum RotationMode mode);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h
deleted file mode 100755
index 757020da86..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h
+++ /dev/null
@@ -1,1694 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_ROW_H_ // NOLINT
-#define INCLUDE_LIBYUV_ROW_H_
-
-#include <stdlib.h> // For malloc.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
-
-#ifdef __cplusplus
-#define align_buffer_64(var, size) \
- uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63)); \
- uint8* var = reinterpret_cast<uint8*> \
- ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
-#else
-#define align_buffer_64(var, size) \
- uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \
- uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
-#endif
-
-#define free_aligned_buffer_64(var) \
- free(var##_mem); \
- var = 0
-
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
- defined(TARGET_IPHONE_SIMULATOR)
-#define LIBYUV_DISABLE_X86
-#endif
-// True if compiling for SSSE3 as a requirement.
-#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
-#define LIBYUV_SSSE3_ONLY
-#endif
-
-// Enable for NaCL pepper 33 for bundle and AVX2 support.
-// #define NEW_BINUTILS
-
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-// Effects:
-#define HAS_ARGBADDROW_SSE2
-#define HAS_ARGBAFFINEROW_SSE2
-#define HAS_ARGBATTENUATEROW_SSSE3
-#define HAS_ARGBBLENDROW_SSSE3
-#define HAS_ARGBCOLORMATRIXROW_SSSE3
-#define HAS_ARGBCOLORTABLEROW_X86
-#define HAS_ARGBCOPYALPHAROW_SSE2
-#define HAS_ARGBCOPYYTOALPHAROW_SSE2
-#define HAS_ARGBGRAYROW_SSSE3
-#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
-#define HAS_ARGBMIRRORROW_SSSE3
-#define HAS_ARGBMULTIPLYROW_SSE2
-#define HAS_ARGBPOLYNOMIALROW_SSE2
-#define HAS_ARGBQUANTIZEROW_SSE2
-#define HAS_ARGBSEPIAROW_SSSE3
-#define HAS_ARGBSHADEROW_SSE2
-#define HAS_ARGBSUBTRACTROW_SSE2
-#define HAS_ARGBTOUVROW_SSSE3
-#define HAS_ARGBUNATTENUATEROW_SSE2
-#define HAS_COMPUTECUMULATIVESUMROW_SSE2
-#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-#define HAS_INTERPOLATEROW_SSE2
-#define HAS_INTERPOLATEROW_SSSE3
-#define HAS_RGBCOLORTABLEROW_X86
-#define HAS_SOBELROW_SSE2
-#define HAS_SOBELTOPLANEROW_SSE2
-#define HAS_SOBELXROW_SSE2
-#define HAS_SOBELXYROW_SSE2
-#define HAS_SOBELYROW_SSE2
-
-// Conversions:
-#define HAS_ABGRTOUVROW_SSSE3
-#define HAS_ABGRTOYROW_SSSE3
-#define HAS_ARGB1555TOARGBROW_SSE2
-#define HAS_ARGB4444TOARGBROW_SSE2
-#define HAS_ARGBSHUFFLEROW_SSE2
-#define HAS_ARGBSHUFFLEROW_SSSE3
-#define HAS_ARGBTOARGB1555ROW_SSE2
-#define HAS_ARGBTOARGB4444ROW_SSE2
-#define HAS_ARGBTOBAYERGGROW_SSE2
-#define HAS_ARGBTOBAYERROW_SSSE3
-#define HAS_ARGBTORAWROW_SSSE3
-#define HAS_ARGBTORGB24ROW_SSSE3
-#define HAS_ARGBTORGB565ROW_SSE2
-#define HAS_ARGBTOUV422ROW_SSSE3
-#define HAS_ARGBTOUV444ROW_SSSE3
-#define HAS_ARGBTOUVJROW_SSSE3
-#define HAS_ARGBTOYJROW_SSSE3
-#define HAS_ARGBTOYROW_SSSE3
-#define HAS_BGRATOUVROW_SSSE3
-#define HAS_BGRATOYROW_SSSE3
-#define HAS_COPYROW_ERMS
-#define HAS_COPYROW_SSE2
-#define HAS_COPYROW_X86
-#define HAS_HALFROW_SSE2
-#define HAS_I400TOARGBROW_SSE2
-#define HAS_I411TOARGBROW_SSSE3
-#define HAS_I422TOARGB1555ROW_SSSE3
-#define HAS_I422TOABGRROW_SSSE3
-#define HAS_I422TOARGB1555ROW_SSSE3
-#define HAS_I422TOARGB4444ROW_SSSE3
-#define HAS_I422TOARGBROW_SSSE3
-#define HAS_I422TOBGRAROW_SSSE3
-#define HAS_I422TORAWROW_SSSE3
-#define HAS_I422TORGB24ROW_SSSE3
-#define HAS_I422TORGB565ROW_SSSE3
-#define HAS_I422TORGBAROW_SSSE3
-#define HAS_I422TOUYVYROW_SSE2
-#define HAS_I422TOYUY2ROW_SSE2
-#define HAS_I444TOARGBROW_SSSE3
-#define HAS_MERGEUVROW_SSE2
-#define HAS_MIRRORROW_SSE2
-#define HAS_MIRRORROW_SSSE3
-#define HAS_MIRRORROW_UV_SSSE3
-#define HAS_MIRRORUVROW_SSSE3
-#define HAS_NV12TOARGBROW_SSSE3
-#define HAS_NV12TORGB565ROW_SSSE3
-#define HAS_NV21TOARGBROW_SSSE3
-#define HAS_NV21TORGB565ROW_SSSE3
-#define HAS_RAWTOARGBROW_SSSE3
-#define HAS_RAWTOYROW_SSSE3
-#define HAS_RGB24TOARGBROW_SSSE3
-#define HAS_RGB24TOYROW_SSSE3
-#define HAS_RGB565TOARGBROW_SSE2
-#define HAS_RGBATOUVROW_SSSE3
-#define HAS_RGBATOYROW_SSSE3
-#define HAS_SETROW_X86
-#define HAS_SPLITUVROW_SSE2
-#define HAS_UYVYTOARGBROW_SSSE3
-#define HAS_UYVYTOUV422ROW_SSE2
-#define HAS_UYVYTOUVROW_SSE2
-#define HAS_UYVYTOYROW_SSE2
-#define HAS_YTOARGBROW_SSE2
-#define HAS_YUY2TOARGBROW_SSSE3
-#define HAS_YUY2TOUV422ROW_SSE2
-#define HAS_YUY2TOUVROW_SSE2
-#define HAS_YUY2TOYROW_SSE2
-#endif
-
-// GCC >= 4.7.0 required for AVX2.
-#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
-#define GCC_HAS_AVX2 1
-#endif // GNUC >= 4.7
-#endif // __GNUC__
-
-// clang >= 3.4.0 required for AVX2.
-#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
-#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
-#define CLANG_HAS_AVX2 1
-#endif // clang >= 3.4
-#endif // __clang__
-
-// Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
-#define VISUALC_HAS_AVX2 1
-#endif // VisualStudio >= 2012
-
-// The following are available on all x86 platforms, but
-// require VS2012, clang 3.4 or gcc 4.7.
-// The code supports NaCL but requires a new compiler and validator.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
- defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
-// Effects:
-#define HAS_ARGBPOLYNOMIALROW_AVX2
-#define HAS_ARGBSHUFFLEROW_AVX2
-#define HAS_ARGBCOPYALPHAROW_AVX2
-#define HAS_ARGBCOPYYTOALPHAROW_AVX2
-#endif
-
-// The following are require VS2012.
-// TODO(fbarchard): Port to gcc.
-#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
-#define HAS_ARGBTOUVROW_AVX2
-#define HAS_ARGBTOYJROW_AVX2
-#define HAS_ARGBTOYROW_AVX2
-#define HAS_HALFROW_AVX2
-#define HAS_I422TOARGBROW_AVX2
-#define HAS_INTERPOLATEROW_AVX2
-#define HAS_MERGEUVROW_AVX2
-#define HAS_MIRRORROW_AVX2
-#define HAS_SPLITUVROW_AVX2
-#define HAS_UYVYTOUV422ROW_AVX2
-#define HAS_UYVYTOUVROW_AVX2
-#define HAS_UYVYTOYROW_AVX2
-#define HAS_YUY2TOUV422ROW_AVX2
-#define HAS_YUY2TOUVROW_AVX2
-#define HAS_YUY2TOYROW_AVX2
-
-// Effects:
-#define HAS_ARGBADDROW_AVX2
-#define HAS_ARGBATTENUATEROW_AVX2
-#define HAS_ARGBMIRRORROW_AVX2
-#define HAS_ARGBMULTIPLYROW_AVX2
-#define HAS_ARGBSUBTRACTROW_AVX2
-#define HAS_ARGBUNATTENUATEROW_AVX2
-#endif // defined(VISUALC_HAS_AVX2)
-
-// The following are Yasm x86 only:
-// TODO(fbarchard): Port AVX2 to inline.
-#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
- (defined(_M_IX86) || defined(_M_X64) || \
- defined(__x86_64__) || defined(__i386__))
-#define HAS_MERGEUVROW_AVX2
-#define HAS_MERGEUVROW_MMX
-#define HAS_SPLITUVROW_AVX2
-#define HAS_SPLITUVROW_MMX
-#define HAS_UYVYTOYROW_AVX2
-#define HAS_UYVYTOYROW_MMX
-#define HAS_YUY2TOYROW_AVX2
-#define HAS_YUY2TOYROW_MMX
-#endif
-
-// The following are disabled when SSSE3 is available:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
- !defined(LIBYUV_SSSE3_ONLY)
-#define HAS_ARGBBLENDROW_SSE2
-#define HAS_ARGBATTENUATEROW_SSE2
-#define HAS_MIRRORROW_SSE2
-#endif
-
-// The following are available on Neon platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_ABGRTOUVROW_NEON
-#define HAS_ABGRTOYROW_NEON
-#define HAS_ARGB1555TOARGBROW_NEON
-#define HAS_ARGB1555TOUVROW_NEON
-#define HAS_ARGB1555TOYROW_NEON
-#define HAS_ARGB4444TOARGBROW_NEON
-#define HAS_ARGB4444TOUVROW_NEON
-#define HAS_ARGB4444TOYROW_NEON
-#define HAS_ARGBTOARGB1555ROW_NEON
-#define HAS_ARGBTOARGB4444ROW_NEON
-#define HAS_ARGBTOBAYERROW_NEON
-#define HAS_ARGBTOBAYERGGROW_NEON
-#define HAS_ARGBTORAWROW_NEON
-#define HAS_ARGBTORGB24ROW_NEON
-#define HAS_ARGBTORGB565ROW_NEON
-#define HAS_ARGBTOUV411ROW_NEON
-#define HAS_ARGBTOUV422ROW_NEON
-#define HAS_ARGBTOUV444ROW_NEON
-#define HAS_ARGBTOUVROW_NEON
-#define HAS_ARGBTOUVJROW_NEON
-#define HAS_ARGBTOYROW_NEON
-#define HAS_ARGBTOYJROW_NEON
-#define HAS_BGRATOUVROW_NEON
-#define HAS_BGRATOYROW_NEON
-#define HAS_COPYROW_NEON
-#define HAS_HALFROW_NEON
-#define HAS_I400TOARGBROW_NEON
-#define HAS_I411TOARGBROW_NEON
-#define HAS_I422TOABGRROW_NEON
-#define HAS_I422TOARGB1555ROW_NEON
-#define HAS_I422TOARGB4444ROW_NEON
-#define HAS_I422TOARGBROW_NEON
-#define HAS_I422TOBGRAROW_NEON
-#define HAS_I422TORAWROW_NEON
-#define HAS_I422TORGB24ROW_NEON
-#define HAS_I422TORGB565ROW_NEON
-#define HAS_I422TORGBAROW_NEON
-#define HAS_I422TOUYVYROW_NEON
-#define HAS_I422TOYUY2ROW_NEON
-#define HAS_I444TOARGBROW_NEON
-#define HAS_MERGEUVROW_NEON
-#define HAS_MIRRORROW_NEON
-#define HAS_MIRRORUVROW_NEON
-#define HAS_NV12TOARGBROW_NEON
-#define HAS_NV12TORGB565ROW_NEON
-#define HAS_NV21TOARGBROW_NEON
-#define HAS_NV21TORGB565ROW_NEON
-#define HAS_RAWTOARGBROW_NEON
-#define HAS_RAWTOUVROW_NEON
-#define HAS_RAWTOYROW_NEON
-#define HAS_RGB24TOARGBROW_NEON
-#define HAS_RGB24TOUVROW_NEON
-#define HAS_RGB24TOYROW_NEON
-#define HAS_RGB565TOARGBROW_NEON
-#define HAS_RGB565TOUVROW_NEON
-#define HAS_RGB565TOYROW_NEON
-#define HAS_RGBATOUVROW_NEON
-#define HAS_RGBATOYROW_NEON
-#define HAS_SETROW_NEON
-#define HAS_SPLITUVROW_NEON
-#define HAS_UYVYTOARGBROW_NEON
-#define HAS_UYVYTOUV422ROW_NEON
-#define HAS_UYVYTOUVROW_NEON
-#define HAS_UYVYTOYROW_NEON
-#define HAS_YTOARGBROW_NEON
-#define HAS_YUY2TOARGBROW_NEON
-#define HAS_YUY2TOUV422ROW_NEON
-#define HAS_YUY2TOUVROW_NEON
-#define HAS_YUY2TOYROW_NEON
-
-// Effects:
-#define HAS_ARGBADDROW_NEON
-#define HAS_ARGBATTENUATEROW_NEON
-#define HAS_ARGBBLENDROW_NEON
-#define HAS_ARGBCOLORMATRIXROW_NEON
-#define HAS_ARGBGRAYROW_NEON
-#define HAS_ARGBMIRRORROW_NEON
-#define HAS_ARGBMULTIPLYROW_NEON
-#define HAS_ARGBQUANTIZEROW_NEON
-#define HAS_ARGBSEPIAROW_NEON
-#define HAS_ARGBSHADEROW_NEON
-#define HAS_ARGBSUBTRACTROW_NEON
-#define HAS_SOBELROW_NEON
-#define HAS_SOBELTOPLANEROW_NEON
-#define HAS_SOBELXYROW_NEON
-#define HAS_SOBELXROW_NEON
-#define HAS_SOBELYROW_NEON
-#define HAS_INTERPOLATEROW_NEON
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
-#define HAS_COPYROW_MIPS
-#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_I422TOABGRROW_MIPS_DSPR2
-#define HAS_I422TOARGBROW_MIPS_DSPR2
-#define HAS_I422TOBGRAROW_MIPS_DSPR2
-#define HAS_INTERPOLATEROWS_MIPS_DSPR2
-#define HAS_MIRRORROW_MIPS_DSPR2
-#define HAS_MIRRORUVROW_MIPS_DSPR2
-#define HAS_SPLITUVROW_MIPS_DSPR2
-#endif
-#endif
-
-#if defined(_MSC_VER) && !defined(__CLR_VER)
-#define SIMD_ALIGNED(var) __declspec(align(16)) var
-typedef __declspec(align(16)) int16 vec16[8];
-typedef __declspec(align(16)) int32 vec32[4];
-typedef __declspec(align(16)) int8 vec8[16];
-typedef __declspec(align(16)) uint16 uvec16[8];
-typedef __declspec(align(16)) uint32 uvec32[4];
-typedef __declspec(align(16)) uint8 uvec8[16];
-typedef __declspec(align(32)) int16 lvec16[16];
-typedef __declspec(align(32)) int32 lvec32[8];
-typedef __declspec(align(32)) int8 lvec8[32];
-typedef __declspec(align(32)) uint16 ulvec16[16];
-typedef __declspec(align(32)) uint32 ulvec32[8];
-typedef __declspec(align(32)) uint8 ulvec8[32];
-
-#elif defined(__GNUC__)
-// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
-#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
-typedef int16 __attribute__((vector_size(16))) vec16;
-typedef int32 __attribute__((vector_size(16))) vec32;
-typedef int8 __attribute__((vector_size(16))) vec8;
-typedef uint16 __attribute__((vector_size(16))) uvec16;
-typedef uint32 __attribute__((vector_size(16))) uvec32;
-typedef uint8 __attribute__((vector_size(16))) uvec8;
-#else
-#define SIMD_ALIGNED(var) var
-typedef int16 vec16[8];
-typedef int32 vec32[4];
-typedef int8 vec8[16];
-typedef uint16 uvec16[8];
-typedef uint32 uvec32[4];
-typedef uint8 uvec8[16];
-#endif
-
-#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
-#define OMITFP
-#else
-#define OMITFP __attribute__((optimize("omit-frame-pointer")))
-#endif
-
-// NaCL macros for GCC x86 and x64.
-
-// TODO(nfullagar): When pepper_33 toolchain is distributed, default to
-// NEW_BINUTILS and remove all BUNDLEALIGN occurances.
-#if defined(__native_client__)
-#define LABELALIGN ".p2align 5\n"
-#else
-#define LABELALIGN ".p2align 2\n"
-#endif
-#if defined(__native_client__) && defined(__x86_64__)
-#if defined(NEW_BINUTILS)
-#define BUNDLELOCK ".bundle_lock\n"
-#define BUNDLEUNLOCK ".bundle_unlock\n"
-#define BUNDLEALIGN "\n"
-#else
-#define BUNDLELOCK "\n"
-#define BUNDLEUNLOCK "\n"
-#define BUNDLEALIGN ".p2align 5\n"
-#endif
-#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
-#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
-#define MEMLEA(offset, base) #offset "(%q" #base ")"
-#define MEMLEA3(offset, index, scale) \
- #offset "(,%q" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
- #offset "(%q" #base ",%q" #index "," #scale ")"
-#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
-#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #opcode " (%%r15,%%r14),%%" #reg "\n" \
- BUNDLEUNLOCK
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #opcode " %%" #reg ",(%%r15,%%r14)\n" \
- BUNDLEUNLOCK
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #opcode " (%%r15,%%r14),%" #arg "\n" \
- BUNDLEUNLOCK
-#else
-#define BUNDLEALIGN "\n"
-#define MEMACCESS(base) "(%" #base ")"
-#define MEMACCESS2(offset, base) #offset "(%" #base ")"
-#define MEMLEA(offset, base) #offset "(%" #base ")"
-#define MEMLEA3(offset, index, scale) \
- #offset "(,%" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
- #offset "(%" #base ",%" #index "," #scale ")"
-#define MEMMOVESTRING(s, d)
-#define MEMSTORESTRING(reg, d)
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
- #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
-#endif
-
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width);
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width);
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width);
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width);
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width);
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_rgb565,
- int width);
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix);
-void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix);
-void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
-
-void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix);
-void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix);
-void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
- int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
- int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
-void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int width);
-void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int width);
-void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV444Row_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV422Row_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV444Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV411Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
-void MirrorRow_C(const uint8* src, uint8* dst, int width);
-
-void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
-
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
- uint8* dst_v, int pix);
-void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-
-void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
- uint8* dst_uv, int width);
-void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
-void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
-void CopyRow_X86(const uint8* src, uint8* dst, int count);
-void CopyRow_NEON(const uint8* src, uint8* dst, int count);
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
-void CopyRow_C(const uint8* src, uint8* dst, int count);
-
-void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-
-void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-
-void SetRow_X86(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
- int dst_stride, int height);
-void SetRow_NEON(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
- int dst_stride, int height);
-void SetRow_C(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
- int height);
-
-// ARGBShufflers for BGRAToARGB etc.
-void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
- int pix);
-void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
- int pix);
-void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_C(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB24Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width);
-void I422ToRAWRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-void I422ToARGB4444Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width);
-void I422ToARGB1555Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width);
-void I422ToRGB565Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width);
-void YToARGBRow_C(const uint8* src_y,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I444ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-// RGB24/RAW are unaligned.
-void I422ToRGB24Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width);
-void I422ToRAWRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-
-void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-// RGB24/RAW are unaligned.
-void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void YToARGBRow_SSE2(const uint8* src_y,
- uint8* dst_argb,
- int width);
-void YToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width);
-void YToARGBRow_Any_SSE2(const uint8* src_y,
- uint8* dst_argb,
- int width);
-void YToARGBRow_Any_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width);
-
-// ARGB preattenuated alpha blend.
-void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// ARGB multiply images. Same API as Blend, but these require
-// pointer and width alignment for SSE2.
-void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// ARGB add images.
-void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// ARGB subtract images. Same API as Blend, but these require
-// pointer and width alignment for SSE2.
-void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void I444ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGBARow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB24Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRAWRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix);
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix);
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_NEON(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_C(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-
-void HalfRow_C(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix);
-
-void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix);
-void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 /* selector */, int pix);
-
-void I422ToYUY2Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-
-// Effects related row functions.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
- int width);
-
-// Inverse table for unattenuate, shared by C and SSE2.
-extern const uint32 fixed_invtbl8[256];
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width);
-
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-
-void ARGBSepiaRow_C(uint8* dst_argb, int width);
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
-
-void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width);
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width);
-void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width);
-
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
-
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
-
-void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width);
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width);
-void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width);
-
-void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value);
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value);
-void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value);
-
-// Used for blur.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst, int count);
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width);
-
-void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst, int count);
-void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width);
-
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-
-// Used for I420Scale, ARGBScale, and ARGBInterpolate.
-void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width, int source_y_fraction);
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-
-// Sobel images.
-void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
- uint8* dst_sobelx, int width);
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width);
-void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width);
-void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width);
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width);
-void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width);
-void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-
-void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width);
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width);
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width);
-
-void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- const uint8* luma, uint32 lumacoeff);
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width,
- const uint8* luma, uint32 lumacoeff);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_ROW_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h
deleted file mode 100755
index 592b8ed5fa..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT
-#define INCLUDE_LIBYUV_SCALE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Supported filtering.
-typedef enum FilterMode {
- kFilterNone = 0, // Point sample; Fastest.
- kFilterLinear = 1, // Filter horizontally only.
- kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
- kFilterBox = 3 // Highest quality.
-} FilterModeEnum;
-
-// Scale a YUV plane.
-LIBYUV_API
-void ScalePlane(const uint8* src, int src_stride,
- int src_width, int src_height,
- uint8* dst, int dst_stride,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-// Scales a YUV 4:2:0 image from the src width and height to the
-// dst width and height.
-// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
-// used. This produces basic (blocky) quality at the fastest speed.
-// If filtering is kFilterBilinear, interpolation is used to produce a better
-// quality image, at the expense of speed.
-// If filtering is kFilterBox, averaging is used to produce ever better
-// quality image, at further expense of speed.
-// Returns 0 if successful.
-
-LIBYUV_API
-int I420Scale(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-#ifdef __cplusplus
-// Legacy API. Deprecated.
-LIBYUV_API
-int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
- int src_stride_y, int src_stride_u, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, uint8* dst_u, uint8* dst_v,
- int dst_stride_y, int dst_stride_u, int dst_stride_v,
- int dst_width, int dst_height,
- LIBYUV_BOOL interpolate);
-
-// Legacy API. Deprecated.
-LIBYUV_API
-int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
- uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
- LIBYUV_BOOL interpolate);
-
-// For testing, allow disabling of specialized scalers.
-LIBYUV_API
-void SetUseReferenceImpl(LIBYUV_BOOL use);
-#endif // __cplusplus
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_SCALE_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h
deleted file mode 100755
index 0c9b362575..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_SCALE_ARGB_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/scale.h" // For FilterMode
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-LIBYUV_API
-int ARGBScale(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-// Clipped scale takes destination rectangle coordinates for clip values.
-LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering);
-
-// TODO(fbarchard): Implement this.
-// Scale with YUV conversion to ARGB and clipping.
-LIBYUV_API
-int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint32 src_fourcc,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- uint32 dst_fourcc,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h
deleted file mode 100644
index 13eccc4d77..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT
-#define INCLUDE_LIBYUV_SCALE_ROW_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
- defined(TARGET_IPHONE_SIMULATOR)
-#define LIBYUV_DISABLE_X86
-#endif
-
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SCALEROWDOWN2_SSE2
-#define HAS_SCALEROWDOWN4_SSE2
-#define HAS_SCALEROWDOWN34_SSSE3
-#define HAS_SCALEROWDOWN38_SSSE3
-#define HAS_SCALEADDROWS_SSE2
-#define HAS_SCALEFILTERCOLS_SSSE3
-#define HAS_SCALECOLSUP2_SSE2
-#define HAS_SCALEARGBROWDOWN2_SSE2
-#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-#define HAS_SCALEARGBCOLS_SSE2
-#define HAS_SCALEARGBFILTERCOLS_SSSE3
-#define HAS_SCALEARGBCOLSUP2_SSE2
-#define HAS_FIXEDDIV_X86
-#define HAS_FIXEDDIV1_X86
-#endif
-
-// The following are available on Neon platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN4_NEON
-#define HAS_SCALEROWDOWN34_NEON
-#define HAS_SCALEROWDOWN38_NEON
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-#define HAS_SCALEARGBROWDOWN2_NEON
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
- defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_SCALEROWDOWN2_MIPS_DSPR2
-#define HAS_SCALEROWDOWN4_MIPS_DSPR2
-#define HAS_SCALEROWDOWN34_MIPS_DSPR2
-#define HAS_SCALEROWDOWN38_MIPS_DSPR2
-#endif
-
-// Scale ARGB vertically with bilinear interpolation.
-void ScalePlaneVertical(int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int y, int dy,
- int bpp, enum FilterMode filtering);
-
-// Simplify the filtering based on scale factors.
-enum FilterMode ScaleFilterReduce(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_C(int num, int div);
-int FixedDiv_X86(int num, int div);
-// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
-int FixedDiv1_C(int num, int div);
-int FixedDiv1_X86(int num, int div);
-#ifdef HAS_FIXEDDIV_X86
-#define FixedDiv FixedDiv_X86
-#define FixedDiv1 FixedDiv1_X86
-#else
-#define FixedDiv FixedDiv_C
-#define FixedDiv1 FixedDiv1_C
-#endif
-
-// Compute slope values for stepping.
-void ScaleSlope(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering,
- int* x, int* y, int* dx, int* dy);
-
-void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int, int);
-void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height);
-void ScaleARGBRowDown2_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int, int);
-void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width,
- int src_height);
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-// Row functions.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, int src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-
-// ScaleRowDown2Box also used by planar functions
-// NEON downscalers with interpolation.
-
-// Note - not static due to reuse in convert for 444 to 420.
-void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-
-void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-
-void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-// Down scale from 4 to 3 pixels. Use the neon multilane read/write
-// to load up the every 4th pixel into a 4 different registers.
-// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-// 32x3 -> 12x1
-void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h
deleted file mode 100755
index 4881861866..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
-#define INCLUDE_LIBYUV_VERSION_H_
-
-#define LIBYUV_VERSION 998
-
-#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h
deleted file mode 100755
index 039efb96d1..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-// Common definitions for video, including fourcc and VideoFormat.
-
-#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT
-#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-// Definition of FourCC codes
-//////////////////////////////////////////////////////////////////////////////
-
-// Convert four characters to a FourCC code.
-// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
-// constants are used in a switch.
-#ifdef __cplusplus
-#define FOURCC(a, b, c, d) ( \
- (static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
- (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
-#else
-#define FOURCC(a, b, c, d) ( \
- ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
- ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
-#endif
-
-// Some pages discussing FourCC codes:
-// http://www.fourcc.org/yuv.php
-// http://v4l2spec.bytesex.org/spec/book1.htm
-// http://developer.apple.com/quicktime/icefloe/dispatch020.html
-// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
-// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
-
-// FourCC codes grouped according to implementation efficiency.
-// Primary formats should convert in 1 efficient step.
-// Secondary formats are converted in 2 steps.
-// Auxilliary formats call primary converters.
-enum FourCC {
- // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
- FOURCC_I420 = FOURCC('I', '4', '2', '0'),
- FOURCC_I422 = FOURCC('I', '4', '2', '2'),
- FOURCC_I444 = FOURCC('I', '4', '4', '4'),
- FOURCC_I411 = FOURCC('I', '4', '1', '1'),
- FOURCC_I400 = FOURCC('I', '4', '0', '0'),
- FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
- FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
- FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
- FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
-
- // 2 Secondary YUV formats: row biplanar.
- FOURCC_M420 = FOURCC('M', '4', '2', '0'),
- FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
-
- // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
- FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
- FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
- FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
- FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
- FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
- FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
- FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
- FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
- FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
-
- // 4 Secondary RGB formats: 4 Bayer Patterns.
- FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
- FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
- FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
- FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
-
- // 1 Primary Compressed YUV format.
- FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
-
- // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
- FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
- FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
- FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
- FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
- FOURCC_J420 = FOURCC('J', '4', '2', '0'),
- FOURCC_J400 = FOURCC('J', '4', '0', '0'),
-
- // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
- FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
- FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
- FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
- FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
- FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
- FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
- FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
- FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
- FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
- FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
- FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
- FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
- FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
- FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
- FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
- FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
- FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
-
- // 1 Auxiliary compressed YUV format set aside for capturer.
- FOURCC_H264 = FOURCC('H', '2', '6', '4'),
-
- // Match any fourcc.
- FOURCC_ANY = 0xFFFFFFFF,
-};
-
-enum FourCCBpp {
- // Canonical fourcc codes used in our code.
- FOURCC_BPP_I420 = 12,
- FOURCC_BPP_I422 = 16,
- FOURCC_BPP_I444 = 24,
- FOURCC_BPP_I411 = 12,
- FOURCC_BPP_I400 = 8,
- FOURCC_BPP_NV21 = 12,
- FOURCC_BPP_NV12 = 12,
- FOURCC_BPP_YUY2 = 16,
- FOURCC_BPP_UYVY = 16,
- FOURCC_BPP_M420 = 12,
- FOURCC_BPP_Q420 = 12,
- FOURCC_BPP_ARGB = 32,
- FOURCC_BPP_BGRA = 32,
- FOURCC_BPP_ABGR = 32,
- FOURCC_BPP_RGBA = 32,
- FOURCC_BPP_24BG = 24,
- FOURCC_BPP_RAW = 24,
- FOURCC_BPP_RGBP = 16,
- FOURCC_BPP_RGBO = 16,
- FOURCC_BPP_R444 = 16,
- FOURCC_BPP_RGGB = 8,
- FOURCC_BPP_BGGR = 8,
- FOURCC_BPP_GRBG = 8,
- FOURCC_BPP_GBRG = 8,
- FOURCC_BPP_YV12 = 12,
- FOURCC_BPP_YV16 = 16,
- FOURCC_BPP_YV24 = 24,
- FOURCC_BPP_YU12 = 12,
- FOURCC_BPP_J420 = 12,
- FOURCC_BPP_J400 = 8,
- FOURCC_BPP_MJPG = 0, // 0 means unknown.
- FOURCC_BPP_H264 = 0,
- FOURCC_BPP_IYUV = 12,
- FOURCC_BPP_YU16 = 16,
- FOURCC_BPP_YU24 = 24,
- FOURCC_BPP_YUYV = 16,
- FOURCC_BPP_YUVS = 16,
- FOURCC_BPP_HDYC = 16,
- FOURCC_BPP_2VUY = 16,
- FOURCC_BPP_JPEG = 1,
- FOURCC_BPP_DMB1 = 1,
- FOURCC_BPP_BA81 = 8,
- FOURCC_BPP_RGB3 = 24,
- FOURCC_BPP_BGR3 = 24,
- FOURCC_BPP_CM32 = 32,
- FOURCC_BPP_CM24 = 24,
-
- // Match any fourcc.
- FOURCC_BPP_ANY = 0, // 0 means unknown.
-};
-
-// Converts fourcc aliases into canonical ones.
-LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt b/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt
deleted file mode 100755
index 680e4a1c36..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-libyuv's source code is here provided in minimalist distribution format
-with all source files not needed for compiling libtheoraplayer removed.
-
-- The project files were modified to fit libtheoraplayer's binary output
- folder structure.
-- Some project files missing in the original source distibution were added to support
- compiling the libtheoraplayer on those platforms.
-- Also, some code may have been changed to address certain compiler/platform
- specific problems and is so indicated in the source code.
-
-libyuv is owned and maintained by the Google Inc. and this distribution
-is present here only for convenience and easier compilation of libtheoraplayer.
-
-If you want to use libyuv outside of libtheoraplayer, it is encouraged to use the
-original source distribution by Google Inc: https://code.google.com/p/libyuv/ \ No newline at end of file
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc
deleted file mode 100755
index 9ea81b4e21..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/compare.h"
-
-#include <float.h>
-#include <math.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
-
-// This module is for Visual C x86
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || \
- (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
-#define HAS_HASHDJB2_SSE41
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
-
-#if _MSC_VER >= 1700
-#define HAS_HASHDJB2_AVX2
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
-#endif
-
-#endif // HAS_HASHDJB2_SSE41
-
-// hash seed of 5381 recommended.
-LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
- const int kBlockSize = 1 << 15; // 32768;
- int remainder;
- uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
-#if defined(HAS_HASHDJB2_SSE41)
- if (TestCpuFlag(kCpuHasSSE41)) {
- HashDjb2_SSE = HashDjb2_SSE41;
- }
-#endif
-#if defined(HAS_HASHDJB2_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- HashDjb2_SSE = HashDjb2_AVX2;
- }
-#endif
-
- while (count >= (uint64)(kBlockSize)) {
- seed = HashDjb2_SSE(src, kBlockSize, seed);
- src += kBlockSize;
- count -= kBlockSize;
- }
- remainder = (int)(count) & ~15;
- if (remainder) {
- seed = HashDjb2_SSE(src, remainder, seed);
- src += remainder;
- count -= remainder;
- }
- remainder = (int)(count) & 15;
- if (remainder) {
- seed = HashDjb2_C(src, remainder, seed);
- }
- return seed;
-}
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_SUMSQUAREERROR_NEON
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
-#endif
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SUMSQUAREERROR_SSE2
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-// Visual C 2012 required for AVX2.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
-#define HAS_SUMSQUAREERROR_AVX2
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-
-// TODO(fbarchard): Refactor into row function.
-LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
- int count) {
- // SumSquareError returns values 0 to 65535 for each squared difference.
- // Up to 65536 of those can be summed and remain within a uint32.
- // After each block of 65536 pixels, accumulate into a uint64.
- const int kBlockSize = 65536;
- int remainder = count & (kBlockSize - 1) & ~31;
- uint64 sse = 0;
- int i;
- uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
- SumSquareError_C;
-#if defined(HAS_SUMSQUAREERROR_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SumSquareError = SumSquareError_NEON;
- }
-#endif
-#if defined(HAS_SUMSQUAREERROR_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
- // Note only used for multiples of 16 so count is not checked.
- SumSquareError = SumSquareError_SSE2;
- }
-#endif
-#if defined(HAS_SUMSQUAREERROR_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- // Note only used for multiples of 32 so count is not checked.
- SumSquareError = SumSquareError_AVX2;
- }
-#endif
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sse)
-#endif
- for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
- sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
- }
- src_a += count & ~(kBlockSize - 1);
- src_b += count & ~(kBlockSize - 1);
- if (remainder) {
- sse += SumSquareError(src_a, src_b, remainder);
- src_a += remainder;
- src_b += remainder;
- }
- remainder = count & 31;
- if (remainder) {
- sse += SumSquareError_C(src_a, src_b, remainder);
- }
- return sse;
-}
-
-LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height) {
- uint64 sse = 0;
- int h;
- // Coalesce rows.
- if (stride_a == width &&
- stride_b == width) {
- width *= height;
- height = 1;
- stride_a = stride_b = 0;
- }
- for (h = 0; h < height; ++h) {
- sse += ComputeSumSquareError(src_a, src_b, width);
- src_a += stride_a;
- src_b += stride_b;
- }
- return sse;
-}
-
-LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
- double psnr;
- if (sse > 0) {
- double mse = (double)(count) / (double)(sse);
- psnr = 10.0 * log10(255.0 * 255.0 * mse);
- } else {
- psnr = kMaxPsnr; // Limit to prevent divide by 0
- }
-
- if (psnr > kMaxPsnr)
- psnr = kMaxPsnr;
-
- return psnr;
-}
-
-LIBYUV_API
-double CalcFramePsnr(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height) {
- const uint64 samples = width * height;
- const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
- src_b, stride_b,
- width, height);
- return SumSquareErrorToPsnr(sse, samples);
-}
-
-LIBYUV_API
-double I420Psnr(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height) {
- const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
- src_y_b, stride_y_b,
- width, height);
- const int width_uv = (width + 1) >> 1;
- const int height_uv = (height + 1) >> 1;
- const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
- src_u_b, stride_u_b,
- width_uv, height_uv);
- const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
- src_v_b, stride_v_b,
- width_uv, height_uv);
- const uint64 samples = width * height + 2 * (width_uv * height_uv);
- const uint64 sse = sse_y + sse_u + sse_v;
- return SumSquareErrorToPsnr(sse, samples);
-}
-
-static const int64 cc1 = 26634; // (64^2*(.01*255)^2
-static const int64 cc2 = 239708; // (64^2*(.03*255)^2
-
-static double Ssim8x8_C(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b) {
- int64 sum_a = 0;
- int64 sum_b = 0;
- int64 sum_sq_a = 0;
- int64 sum_sq_b = 0;
- int64 sum_axb = 0;
-
- int i;
- for (i = 0; i < 8; ++i) {
- int j;
- for (j = 0; j < 8; ++j) {
- sum_a += src_a[j];
- sum_b += src_b[j];
- sum_sq_a += src_a[j] * src_a[j];
- sum_sq_b += src_b[j] * src_b[j];
- sum_axb += src_a[j] * src_b[j];
- }
-
- src_a += stride_a;
- src_b += stride_b;
- }
-
- {
- const int64 count = 64;
- // scale the constants by number of pixels
- const int64 c1 = (cc1 * count * count) >> 12;
- const int64 c2 = (cc2 * count * count) >> 12;
-
- const int64 sum_a_x_sum_b = sum_a * sum_b;
-
- const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
- (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
-
- const int64 sum_a_sq = sum_a*sum_a;
- const int64 sum_b_sq = sum_b*sum_b;
-
- const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
- (count * sum_sq_a - sum_a_sq +
- count * sum_sq_b - sum_b_sq + c2);
-
- if (ssim_d == 0.0) {
- return DBL_MAX;
- }
- return ssim_n * 1.0 / ssim_d;
- }
-}
-
-// We are using a 8x8 moving window with starting location of each 8x8 window
-// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
-// block boundaries to penalize blocking artifacts.
-LIBYUV_API
-double CalcFrameSsim(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height) {
- int samples = 0;
- double ssim_total = 0;
- double (*Ssim8x8)(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b) = Ssim8x8_C;
-
- // sample point start with each 4x4 location
- int i;
- for (i = 0; i < height - 8; i += 4) {
- int j;
- for (j = 0; j < width - 8; j += 4) {
- ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
- samples++;
- }
-
- src_a += stride_a * 4;
- src_b += stride_b * 4;
- }
-
- ssim_total /= samples;
- return ssim_total;
-}
-
-LIBYUV_API
-double I420Ssim(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height) {
- const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
- src_y_b, stride_y_b, width, height);
- const int width_uv = (width + 1) >> 1;
- const int height_uv = (height + 1) >> 1;
- const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
- src_u_b, stride_u_b,
- width_uv, height_uv);
- const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
- src_v_b, stride_v_b,
- width_uv, height_uv);
- return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc
deleted file mode 100755
index c546b51829..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse = 0u;
- int i;
- for (i = 0; i < count; ++i) {
- int diff = src_a[i] - src_b[i];
- sse += (uint32)(diff * diff);
- }
- return sse;
-}
-
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
- uint32 hash = seed;
- int i;
- for (i = 0; i < count; ++i) {
- hash += (hash << 5) + src[i];
- }
- return hash;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc
deleted file mode 100755
index bb843a6ab8..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
- volatile uint32 sse;
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- "vmov.u8 q8, #0 \n"
- "vmov.u8 q10, #0 \n"
- "vmov.u8 q9, #0 \n"
- "vmov.u8 q11, #0 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n"
- "vld1.8 {q1}, [%1]! \n"
- "subs %2, %2, #16 \n"
- "vsubl.u8 q2, d0, d2 \n"
- "vsubl.u8 q3, d1, d3 \n"
- "vmlal.s16 q8, d4, d4 \n"
- "vmlal.s16 q9, d6, d6 \n"
- "vmlal.s16 q10, d5, d5 \n"
- "vmlal.s16 q11, d7, d7 \n"
- "bgt 1b \n"
-
- "vadd.u32 q8, q8, q9 \n"
- "vadd.u32 q10, q10, q11 \n"
- "vadd.u32 q11, q8, q10 \n"
- "vpaddl.u32 q1, q11 \n"
- "vadd.u64 d0, d2, d3 \n"
- "vmov.32 %3, d0[0] \n"
- : "+r"(src_a),
- "+r"(src_b),
- "+r"(count),
- "=r"(sse)
- :
- : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
- return sse;
-}
-
-#endif // __ARM_NEON__
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc
deleted file mode 100755
index ac361190e8..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse;
- asm volatile ( // NOLINT
- "pxor %%xmm0,%%xmm0 \n"
- "pxor %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
- "lea " MEMLEA(0x10, 0) ",%0 \n"
- "movdqa " MEMACCESS(1) ",%%xmm2 \n"
- "lea " MEMLEA(0x10, 1) ",%1 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psubusb %%xmm2,%%xmm1 \n"
- "psubusb %%xmm3,%%xmm2 \n"
- "por %%xmm2,%%xmm1 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpckhbw %%xmm5,%%xmm2 \n"
- "pmaddwd %%xmm1,%%xmm1 \n"
- "pmaddwd %%xmm2,%%xmm2 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "jg 1b \n"
-
- "pshufd $0xee,%%xmm0,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "pshufd $0x1,%%xmm0,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "movd %%xmm0,%3 \n"
-
- : "+r"(src_a), // %0
- "+r"(src_b), // %1
- "+r"(count), // %2
- "=g"(sse) // %3
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- ); // NOLINT
- return sse;
-}
-
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
-static uvec32 kHashMul0 = {
- 0x0c3525e1, // 33 ^ 15
- 0xa3476dc1, // 33 ^ 14
- 0x3b4039a1, // 33 ^ 13
- 0x4f5f0981, // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
- 0x30f35d61, // 33 ^ 11
- 0x855cb541, // 33 ^ 10
- 0x040a9121, // 33 ^ 9
- 0x747c7101, // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
- 0xec41d4e1, // 33 ^ 7
- 0x4cfa3cc1, // 33 ^ 6
- 0x025528a1, // 33 ^ 5
- 0x00121881, // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
- 0x00008c61, // 33 ^ 3
- 0x00000441, // 33 ^ 2
- 0x00000021, // 33 ^ 1
- 0x00000001, // 33 ^ 0
-};
-
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
- uint32 hash;
- asm volatile ( // NOLINT
- "movd %2,%%xmm0 \n"
- "pxor %%xmm7,%%xmm7 \n"
- "movdqa %4,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "lea " MEMLEA(0x10, 0) ",%0 \n"
- "pmulld %%xmm6,%%xmm0 \n"
- "movdqa %5,%%xmm5 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm7,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm7,%%xmm3 \n"
- "pmulld %%xmm5,%%xmm3 \n"
- "movdqa %6,%%xmm5 \n"
- "movdqa %%xmm2,%%xmm4 \n"
- "punpckhwd %%xmm7,%%xmm4 \n"
- "pmulld %%xmm5,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "punpckhbw %%xmm7,%%xmm1 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklwd %%xmm7,%%xmm2 \n"
- "pmulld %%xmm5,%%xmm2 \n"
- "movdqa %8,%%xmm5 \n"
- "punpckhwd %%xmm7,%%xmm1 \n"
- "pmulld %%xmm5,%%xmm1 \n"
- "paddd %%xmm4,%%xmm3 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "sub $0x10,%1 \n"
- "paddd %%xmm3,%%xmm1 \n"
- "pshufd $0xe,%%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "pshufd $0x1,%%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "jg 1b \n"
- "movd %%xmm0,%3 \n"
- : "+r"(src), // %0
- "+r"(count), // %1
- "+rm"(seed), // %2
- "=g"(hash) // %3
- : "m"(kHash16x33), // %4
- "m"(kHashMul0), // %5
- "m"(kHashMul1), // %6
- "m"(kHashMul2), // %7
- "m"(kHashMul3) // %8
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- ); // NOLINT
- return hash;
-}
-#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc
deleted file mode 100755
index 99831651f5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-__declspec(naked) __declspec(align(16))
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
- __asm {
- mov eax, [esp + 4] // src_a
- mov edx, [esp + 8] // src_b
- mov ecx, [esp + 12] // count
- pxor xmm0, xmm0
- pxor xmm5, xmm5
-
- align 4
- wloop:
- movdqa xmm1, [eax]
- lea eax, [eax + 16]
- movdqa xmm2, [edx]
- lea edx, [edx + 16]
- sub ecx, 16
- movdqa xmm3, xmm1 // abs trick
- psubusb xmm1, xmm2
- psubusb xmm2, xmm3
- por xmm1, xmm2
- movdqa xmm2, xmm1
- punpcklbw xmm1, xmm5
- punpckhbw xmm2, xmm5
- pmaddwd xmm1, xmm1
- pmaddwd xmm2, xmm2
- paddd xmm0, xmm1
- paddd xmm0, xmm2
- jg wloop
-
- pshufd xmm1, xmm0, 0xee
- paddd xmm0, xmm1
- pshufd xmm1, xmm0, 0x01
- paddd xmm0, xmm1
- movd eax, xmm0
- ret
- }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
-#pragma warning(disable: 4752)
-__declspec(naked) __declspec(align(16))
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
- __asm {
- mov eax, [esp + 4] // src_a
- mov edx, [esp + 8] // src_b
- mov ecx, [esp + 12] // count
- vpxor ymm0, ymm0, ymm0 // sum
- vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
- sub edx, eax
-
- align 4
- wloop:
- vmovdqu ymm1, [eax]
- vmovdqu ymm2, [eax + edx]
- lea eax, [eax + 32]
- sub ecx, 32
- vpsubusb ymm3, ymm1, ymm2 // abs difference trick
- vpsubusb ymm2, ymm2, ymm1
- vpor ymm1, ymm2, ymm3
- vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
- vpunpckhbw ymm1, ymm1, ymm5
- vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
- vpmaddwd ymm1, ymm1, ymm1
- vpaddd ymm0, ymm0, ymm1
- vpaddd ymm0, ymm0, ymm2
- jg wloop
-
- vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
- vpaddd ymm0, ymm0, ymm1
- vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
- vpaddd ymm0, ymm0, ymm1
- vpermq ymm1, ymm0, 0x02 // high + low lane.
- vpaddd ymm0, ymm0, ymm1
- vmovd eax, xmm0
- vzeroupper
- ret
- }
-}
-#endif // _MSC_VER >= 1700
-
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
-static uvec32 kHashMul0 = {
- 0x0c3525e1, // 33 ^ 15
- 0xa3476dc1, // 33 ^ 14
- 0x3b4039a1, // 33 ^ 13
- 0x4f5f0981, // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
- 0x30f35d61, // 33 ^ 11
- 0x855cb541, // 33 ^ 10
- 0x040a9121, // 33 ^ 9
- 0x747c7101, // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
- 0xec41d4e1, // 33 ^ 7
- 0x4cfa3cc1, // 33 ^ 6
- 0x025528a1, // 33 ^ 5
- 0x00121881, // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
- 0x00008c61, // 33 ^ 3
- 0x00000441, // 33 ^ 2
- 0x00000021, // 33 ^ 1
- 0x00000001, // 33 ^ 0
-};
-
-// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
-// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
-// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
-// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
-// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
-#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
- _asm _emit 0x40 _asm _emit reg
-
-__declspec(naked) __declspec(align(16))
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
- __asm {
- mov eax, [esp + 4] // src
- mov ecx, [esp + 8] // count
- movd xmm0, [esp + 12] // seed
-
- pxor xmm7, xmm7 // constant 0 for unpck
- movdqa xmm6, kHash16x33
-
- align 4
- wloop:
- movdqu xmm1, [eax] // src[0-15]
- lea eax, [eax + 16]
- pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
- movdqa xmm5, kHashMul0
- movdqa xmm2, xmm1
- punpcklbw xmm2, xmm7 // src[0-7]
- movdqa xmm3, xmm2
- punpcklwd xmm3, xmm7 // src[0-3]
- pmulld(0xdd) // pmulld xmm3, xmm5
- movdqa xmm5, kHashMul1
- movdqa xmm4, xmm2
- punpckhwd xmm4, xmm7 // src[4-7]
- pmulld(0xe5) // pmulld xmm4, xmm5
- movdqa xmm5, kHashMul2
- punpckhbw xmm1, xmm7 // src[8-15]
- movdqa xmm2, xmm1
- punpcklwd xmm2, xmm7 // src[8-11]
- pmulld(0xd5) // pmulld xmm2, xmm5
- movdqa xmm5, kHashMul3
- punpckhwd xmm1, xmm7 // src[12-15]
- pmulld(0xcd) // pmulld xmm1, xmm5
- paddd xmm3, xmm4 // add 16 results
- paddd xmm1, xmm2
- sub ecx, 16
- paddd xmm1, xmm3
-
- pshufd xmm2, xmm1, 0x0e // upper 2 dwords
- paddd xmm1, xmm2
- pshufd xmm2, xmm1, 0x01
- paddd xmm1, xmm2
- paddd xmm0, xmm1
- jg wloop
-
- movd eax, xmm0 // return hash
- ret
- }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-__declspec(naked) __declspec(align(16))
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
- __asm {
- mov eax, [esp + 4] // src
- mov ecx, [esp + 8] // count
- movd xmm0, [esp + 12] // seed
- movdqa xmm6, kHash16x33
-
- align 4
- wloop:
- vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
- pmulld xmm0, xmm6 // hash *= 33 ^ 16
- vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
- pmulld xmm3, kHashMul0
- vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
- pmulld xmm4, kHashMul1
- vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
- pmulld xmm2, kHashMul2
- lea eax, [eax + 16]
- pmulld xmm1, kHashMul3
- paddd xmm3, xmm4 // add 16 results
- paddd xmm1, xmm2
- sub ecx, 16
- paddd xmm1, xmm3
- pshufd xmm2, xmm1, 0x0e // upper 2 dwords
- paddd xmm1, xmm2
- pshufd xmm2, xmm1, 0x01
- paddd xmm1, xmm2
- paddd xmm0, xmm1
- jg wloop
-
- movd eax, xmm0 // return hash
- ret
- }
-}
-#endif // _MSC_VER >= 1700
-
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc
deleted file mode 100755
index c8408dc798..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc
+++ /dev/null
@@ -1,1491 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/scale.h" // For ScalePlane()
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// Any I4xx To I420 format with mirroring.
-static int I4xxToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_y_width, int src_y_height,
- int src_uv_width, int src_uv_height) {
- if (src_y_width == 0 || src_y_height == 0 ||
- src_uv_width == 0 || src_uv_height == 0) {
- return -1;
- }
- const int dst_y_width = Abs(src_y_width);
- const int dst_y_height = Abs(src_y_height);
- const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
- const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
- ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
- dst_y, dst_stride_y, dst_y_width, dst_y_height,
- kFilterBilinear);
- ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
- dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
- dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- return 0;
-}
-
-// Copy I420 with optional flipping
-// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
-// is does row coalescing.
-LIBYUV_API
-int I420Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_y || !src_u || !src_v ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- const int halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- if (dst_y) {
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- }
- // Copy UV planes.
- const int halfwidth = (width + 1) >> 1;
- const int halfheight = (height + 1) >> 1;
- CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
- CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
- return 0;
-}
-
-// 422 chroma is 1/2 width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I422ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int src_uv_width = SUBSAMPLE(width, 1, 1);
- return I4xxToI420(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- src_uv_width, height);
-}
-
-// 444 chroma is 1x width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I444ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return I4xxToI420(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- width, height);
-}
-
-// 411 chroma is 1/4 width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I411ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int src_uv_width = SUBSAMPLE(width, 3, 2);
- return I4xxToI420(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- src_uv_width, height);
-}
-
-// I400 is greyscale typically used in MJPG
-LIBYUV_API
-int I400ToI420(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_y || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
- SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
- return 0;
-}
-
-static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
- uint8* dst, int dst_stride,
- int width, int height) {
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src, 16) &&
- IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- CopyRow = CopyRow_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Copy plane
- for (int y = 0; y < height - 1; y += 2) {
- CopyRow(src, dst, width);
- CopyRow(src + src_stride_0, dst + dst_stride, width);
- src += src_stride_0 + src_stride_1;
- dst += dst_stride * 2;
- }
- if (height & 1) {
- CopyRow(src, dst, width);
- }
-}
-
-// Support converting from FOURCC_M420
-// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
-// easy conversion to I420.
-// M420 format description:
-// M420 is row biplanar 420: 2 rows of Y and 1 row of UV.
-// Chroma is half width / half height. (420)
-// src_stride_m420 is row planar. Normally this will be the width in pixels.
-// The UV plane is half width, but 2 values, so src_stride_m420 applies to
-// this as well as the two Y planes.
-static int X420ToI420(const uint8* src_y,
- int src_stride_y0, int src_stride_y1,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_y || !src_uv ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- int halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_u = dst_u + (halfheight - 1) * dst_stride_u;
- dst_v = dst_v + (halfheight - 1) * dst_stride_v;
- dst_stride_y = -dst_stride_y;
- dst_stride_u = -dst_stride_u;
- dst_stride_v = -dst_stride_v;
- }
- // Coalesce rows.
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (src_stride_y0 == width &&
- src_stride_y1 == width &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_y0 = src_stride_y1 = dst_stride_y = 0;
- }
- // Coalesce rows.
- if (src_stride_uv == halfwidth * 2 &&
- dst_stride_u == halfwidth &&
- dst_stride_v == halfwidth) {
- halfwidth *= halfheight;
- halfheight = 1;
- src_stride_uv = dst_stride_u = dst_stride_v = 0;
- }
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
- SplitUVRow_C;
-#if defined(HAS_SPLITUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
- SplitUVRow = SplitUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
- IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
- IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
- SplitUVRow = SplitUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
- SplitUVRow = SplitUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_NEON;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
- SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
- if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
- IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
- IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
- SplitUVRow = SplitUVRow_MIPS_DSPR2;
- }
- }
- }
-#endif
-
- if (dst_y) {
- if (src_stride_y0 == src_stride_y1) {
- CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height);
- } else {
- CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
- width, height);
- }
- }
-
- for (int y = 0; y < halfheight; ++y) {
- // Copy a row of UV.
- SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- src_uv += src_stride_uv;
- }
- return 0;
-}
-
-// Convert NV12 to I420.
-LIBYUV_API
-int NV12ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return X420ToI420(src_y, src_stride_y, src_stride_y,
- src_uv, src_stride_uv,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
-}
-
-// Convert NV21 to I420. Same as NV12 but u and v pointers swapped.
-LIBYUV_API
-int NV21ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return X420ToI420(src_y, src_stride_y, src_stride_y,
- src_vu, src_stride_vu,
- dst_y, dst_stride_y,
- dst_v, dst_stride_v,
- dst_u, dst_stride_u,
- width, height);
-}
-
-// Convert M420 to I420.
-LIBYUV_API
-int M420ToI420(const uint8* src_m420, int src_stride_m420,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
- src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
-}
-
-// Convert Q420 to I420.
-// Format is rows of YY/YUYV
-LIBYUV_API
-int Q420ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_y || !src_yuy2 ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- int halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_u = dst_u + (halfheight - 1) * dst_stride_u;
- dst_v = dst_v + (halfheight - 1) * dst_stride_v;
- dst_stride_y = -dst_stride_y;
- dst_stride_u = -dst_stride_u;
- dst_stride_v = -dst_stride_v;
- }
- // CopyRow for rows of just Y in Q420 copied to Y plane of I420.
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_X86)
- if (IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- CopyRow = CopyRow_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int pix) = YUY2ToUV422Row_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
- YUY2ToYRow_C;
-#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
- YUY2ToYRow = YUY2ToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
- YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
- YUY2ToYRow = YUY2ToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
- YUY2ToYRow = YUY2ToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- YUY2ToYRow = YUY2ToYRow_Any_NEON;
- if (width >= 16) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- YUY2ToYRow = YUY2ToYRow_NEON;
- YUY2ToUV422Row = YUY2ToUV422Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- CopyRow(src_y, dst_y, width);
- src_y += src_stride_y;
- dst_y += dst_stride_y;
-
- YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- src_yuy2 += src_stride_yuy2;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- CopyRow(src_y, dst_y, width);
- YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
- }
- return 0;
-}
-
-// Convert YUY2 to I420.
-LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
- src_stride_yuy2 = -src_stride_yuy2;
- }
- void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
- void (*YUY2ToYRow)(const uint8* src_yuy2,
- uint8* dst_y, int pix);
- YUY2ToYRow = YUY2ToYRow_C;
- YUY2ToUVRow = YUY2ToUVRow_C;
-#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
- YUY2ToYRow = YUY2ToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
- YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
- YUY2ToUVRow = YUY2ToUVRow_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- YUY2ToUVRow = YUY2ToUVRow_Any_AVX2;
- YUY2ToYRow = YUY2ToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- YUY2ToUVRow = YUY2ToUVRow_AVX2;
- YUY2ToYRow = YUY2ToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- YUY2ToYRow = YUY2ToYRow_Any_NEON;
- if (width >= 16) {
- YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- YUY2ToYRow = YUY2ToYRow_NEON;
- YUY2ToUVRow = YUY2ToUVRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
- src_yuy2 += src_stride_yuy2 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- }
- return 0;
-}
-
-// Convert UYVY to I420.
-LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
- src_stride_uyvy = -src_stride_uyvy;
- }
- void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
- void (*UYVYToYRow)(const uint8* src_uyvy,
- uint8* dst_y, int pix);
- UYVYToYRow = UYVYToYRow_C;
- UYVYToUVRow = UYVYToUVRow_C;
-#if defined(HAS_UYVYTOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- UYVYToUVRow = UYVYToUVRow_Any_SSE2;
- UYVYToYRow = UYVYToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
- UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
- UYVYToUVRow = UYVYToUVRow_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- UYVYToYRow = UYVYToYRow_SSE2;
- }
- }
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- UYVYToUVRow = UYVYToUVRow_Any_AVX2;
- UYVYToYRow = UYVYToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- UYVYToUVRow = UYVYToUVRow_AVX2;
- UYVYToYRow = UYVYToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- UYVYToYRow = UYVYToYRow_Any_NEON;
- if (width >= 16) {
- UYVYToUVRow = UYVYToUVRow_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- UYVYToYRow = UYVYToYRow_NEON;
- UYVYToUVRow = UYVYToUVRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
- UYVYToYRow(src_uyvy, dst_y, width);
- UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width);
- src_uyvy += src_stride_uyvy * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width);
- UYVYToYRow(src_uyvy, dst_y, width);
- }
- return 0;
-}
-
-// Convert ARGB to I420.
-LIBYUV_API
-int ARGBToI420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_argb ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- }
- return 0;
-}
-
-// Convert BGRA to I420.
-LIBYUV_API
-int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_bgra ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_bgra = src_bgra + (height - 1) * src_stride_bgra;
- src_stride_bgra = -src_stride_bgra;
- }
- void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
- void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
- BGRAToYRow_C;
-#if defined(HAS_BGRATOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
- BGRAToYRow = BGRAToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
- BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) {
- BGRAToUVRow = BGRAToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- BGRAToYRow = BGRAToYRow_SSSE3;
- }
- }
- }
- }
-#elif defined(HAS_BGRATOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- BGRAToYRow = BGRAToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- BGRAToYRow = BGRAToYRow_NEON;
- }
- if (width >= 16) {
- BGRAToUVRow = BGRAToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_NEON;
- }
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
- BGRAToYRow(src_bgra, dst_y, width);
- BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width);
- src_bgra += src_stride_bgra * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width);
- BGRAToYRow(src_bgra, dst_y, width);
- }
- return 0;
-}
-
-// Convert ABGR to I420.
-LIBYUV_API
-int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_abgr ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_abgr = src_abgr + (height - 1) * src_stride_abgr;
- src_stride_abgr = -src_stride_abgr;
- }
- void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
- void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
- ABGRToYRow_C;
-#if defined(HAS_ABGRTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
- ABGRToYRow = ABGRToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
- ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) {
- ABGRToUVRow = ABGRToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ABGRToYRow = ABGRToYRow_SSSE3;
- }
- }
- }
- }
-#elif defined(HAS_ABGRTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ABGRToYRow = ABGRToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ABGRToYRow = ABGRToYRow_NEON;
- }
- if (width >= 16) {
- ABGRToUVRow = ABGRToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ABGRToUVRow = ABGRToUVRow_NEON;
- }
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
- ABGRToYRow(src_abgr, dst_y, width);
- ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
- src_abgr += src_stride_abgr * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
- ABGRToYRow(src_abgr, dst_y, width);
- }
- return 0;
-}
-
-// Convert RGBA to I420.
-LIBYUV_API
-int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_rgba ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgba = src_rgba + (height - 1) * src_stride_rgba;
- src_stride_rgba = -src_stride_rgba;
- }
- void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
- void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
- RGBAToYRow_C;
-#if defined(HAS_RGBATOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
- RGBAToYRow = RGBAToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
- RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) {
- RGBAToUVRow = RGBAToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- RGBAToYRow = RGBAToYRow_SSSE3;
- }
- }
- }
- }
-#elif defined(HAS_RGBATOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- RGBAToYRow = RGBAToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGBAToYRow = RGBAToYRow_NEON;
- }
- if (width >= 16) {
- RGBAToUVRow = RGBAToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- RGBAToUVRow = RGBAToUVRow_NEON;
- }
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
- RGBAToYRow(src_rgba, dst_y, width);
- RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width);
- src_rgba += src_stride_rgba * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width);
- RGBAToYRow(src_rgba, dst_y, width);
- }
- return 0;
-}
-
-// Convert RGB24 to I420.
-LIBYUV_API
-int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
- src_stride_rgb24 = -src_stride_rgb24;
- }
-
-#if defined(HAS_RGB24TOYROW_NEON)
- void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
- void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
- RGB24ToYRow_C;
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- RGB24ToYRow = RGB24ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB24ToYRow = RGB24ToYRow_NEON;
- }
- if (width >= 16) {
- RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- RGB24ToUVRow = RGB24ToUVRow_NEON;
- }
- }
- }
-#else // HAS_RGB24TOYROW_NEON
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RGB24ToARGBRow_C;
-#if defined(HAS_RGB24TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
- }
- }
-#endif
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- }
- }
-#endif
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_RGB24TOYROW_NEON
-
- for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
- RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb24 += src_stride_rgb24 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RGB24TOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
- return 0;
-}
-
-// Convert RAW to I420.
-LIBYUV_API
-int RAWToI420(const uint8* src_raw, int src_stride_raw,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_raw || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_raw = src_raw + (height - 1) * src_stride_raw;
- src_stride_raw = -src_stride_raw;
- }
-
-#if defined(HAS_RAWTOYROW_NEON)
- void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
- void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
- RAWToYRow_C;
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- RAWToYRow = RAWToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RAWToYRow = RAWToYRow_NEON;
- }
- if (width >= 16) {
- RAWToUVRow = RAWToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- RAWToUVRow = RAWToUVRow_NEON;
- }
- }
- }
-#else // HAS_RAWTOYROW_NEON
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RAWToARGBRow_C;
-#if defined(HAS_RAWTOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RAWToARGBRow = RAWToARGBRow_SSSE3;
- }
- }
-#endif
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- }
- }
-#endif
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_RAWTOYROW_NEON
-
- for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
- RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_raw += src_stride_raw * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RAWTOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
- return 0;
-}
-
-// Convert RGB565 to I420.
-LIBYUV_API
-int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
- src_stride_rgb565 = -src_stride_rgb565;
- }
-
-#if defined(HAS_RGB565TOYROW_NEON)
- void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
- void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
- RGB565ToYRow_C;
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- RGB565ToYRow = RGB565ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToYRow = RGB565ToYRow_NEON;
- }
- if (width >= 16) {
- RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- RGB565ToUVRow = RGB565ToUVRow_NEON;
- }
- }
- }
-#else // HAS_RGB565TOYROW_NEON
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RGB565ToARGBRow_C;
-#if defined(HAS_RGB565TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
- }
- }
-#endif
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- }
- }
-#endif
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_RGB565TOYROW_NEON
-
- for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
- RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb565 += src_stride_rgb565 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RGB565TOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
- return 0;
-}
-
-// Convert ARGB1555 to I420.
-LIBYUV_API
-int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
- src_stride_argb1555 = -src_stride_argb1555;
- }
-
-#if defined(HAS_ARGB1555TOYROW_NEON)
- void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
- void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
- ARGB1555ToYRow_C;
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToYRow = ARGB1555ToYRow_NEON;
- }
- if (width >= 16) {
- ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
- }
- }
- }
-#else // HAS_ARGB1555TOYROW_NEON
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- ARGB1555ToARGBRow_C;
-#if defined(HAS_ARGB1555TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
- }
- }
-#endif
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- }
- }
-#endif
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_ARGB1555TOYROW_NEON
-
- for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
- ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
- width);
-#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_argb1555 += src_stride_argb1555 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
-#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_ARGB1555TOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
- return 0;
-}
-
-// Convert ARGB4444 to I420.
-LIBYUV_API
-int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
- src_stride_argb4444 = -src_stride_argb4444;
- }
-
-#if defined(HAS_ARGB4444TOYROW_NEON)
- void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
- void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
- ARGB4444ToYRow_C;
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToYRow = ARGB4444ToYRow_NEON;
- }
- if (width >= 16) {
- ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
- }
- }
- }
-#else // HAS_ARGB4444TOYROW_NEON
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- ARGB4444ToARGBRow_C;
-#if defined(HAS_ARGB4444TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
- }
- }
-#endif
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- }
- }
-#endif
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#endif // HAS_ARGBTOUVROW_SSSE3
-#endif // HAS_ARGB4444TOYROW_NEON
-
- for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
- ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
- width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_argb4444 += src_stride_argb4444 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_ARGB4444TOYROW_NEON)
- free_aligned_buffer_64(row);
-#endif
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc
deleted file mode 100755
index a8aab91478..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc
+++ /dev/null
@@ -1,901 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_argb.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy ARGB with optional flipping
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_argb || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-
- CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
- width * 4, height);
- return 0;
-}
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u == width &&
- src_stride_v == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
- void (*I444ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I444ToARGBRow_C;
-#if defined(HAS_I444TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I444ToARGBRow = I444ToARGBRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I444TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I444ToARGBRow = I444ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I444ToARGBRow = I444ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I422 to ARGB.
-LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I411 to ARGB.
-LIBYUV_API
-int I411ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 4 == width &&
- src_stride_v * 4 == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
- void (*I411ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I411ToARGBRow_C;
-#if defined(HAS_I411TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I411ToARGBRow = I411ToARGBRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I411TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I411ToARGBRow = I411ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I411ToARGBRow = I411ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I400 to ARGB.
-LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_argb = 0;
- }
- void (*YToARGBRow)(const uint8* y_buf,
- uint8* rgb_buf,
- int width) = YToARGBRow_C;
-#if defined(HAS_YTOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- YToARGBRow = YToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- YToARGBRow = YToARGBRow_SSE2;
- }
- }
-#elif defined(HAS_YTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- YToARGBRow = YToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- YToARGBRow = YToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- YToARGBRow(src_y, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- }
- return 0;
-}
-
-// Convert I400 to ARGB.
-LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_argb = 0;
- }
- void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
- I400ToARGBRow_C;
-#if defined(HAS_I400TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
- I400ToARGBRow = I400ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I400ToARGBRow = I400ToARGBRow_SSE2;
- }
- }
- }
-#elif defined(HAS_I400TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I400ToARGBRow = I400ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I400ToARGBRow = I400ToARGBRow_NEON;
- }
- }
-#endif
- for (int y = 0; y < height; ++y) {
- I400ToARGBRow(src_y, dst_argb, width);
- src_y += src_stride_y;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Shuffle table for converting BGRA to ARGB.
-static uvec8 kShuffleMaskBGRAToARGB = {
- 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
-};
-
-// Shuffle table for converting ABGR to ARGB.
-static uvec8 kShuffleMaskABGRToARGB = {
- 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
-};
-
-// Shuffle table for converting RGBA to ARGB.
-static uvec8 kShuffleMaskRGBAToARGB = {
- 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
-};
-
-// Convert BGRA to ARGB.
-LIBYUV_API
-int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_bgra, src_stride_bgra,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskBGRAToARGB),
- width, height);
-}
-
-// Convert ABGR to ARGB.
-LIBYUV_API
-int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_abgr, src_stride_abgr,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskABGRToARGB),
- width, height);
-}
-
-// Convert RGBA to ARGB.
-LIBYUV_API
-int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_rgba, src_stride_rgba,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskRGBAToARGB),
- width, height);
-}
-
-// Convert RGB24 to ARGB.
-LIBYUV_API
-int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_rgb24 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
- src_stride_rgb24 = -src_stride_rgb24;
- }
- // Coalesce rows.
- if (src_stride_rgb24 == width * 3 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_rgb24 = dst_stride_argb = 0;
- }
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RGB24ToARGBRow_C;
-#if defined(HAS_RGB24TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
- }
- }
-#elif defined(HAS_RGB24TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB24ToARGBRow = RGB24ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- RGB24ToARGBRow(src_rgb24, dst_argb, width);
- src_rgb24 += src_stride_rgb24;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert RAW to ARGB.
-LIBYUV_API
-int RAWToARGB(const uint8* src_raw, int src_stride_raw,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_raw || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_raw = src_raw + (height - 1) * src_stride_raw;
- src_stride_raw = -src_stride_raw;
- }
- // Coalesce rows.
- if (src_stride_raw == width * 3 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_raw = dst_stride_argb = 0;
- }
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RAWToARGBRow_C;
-#if defined(HAS_RAWTOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RAWToARGBRow = RAWToARGBRow_SSSE3;
- }
- }
-#elif defined(HAS_RAWTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- RAWToARGBRow = RAWToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RAWToARGBRow = RAWToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- RAWToARGBRow(src_raw, dst_argb, width);
- src_raw += src_stride_raw;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert RGB565 to ARGB.
-LIBYUV_API
-int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_rgb565 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
- src_stride_rgb565 = -src_stride_rgb565;
- }
- // Coalesce rows.
- if (src_stride_rgb565 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_rgb565 = dst_stride_argb = 0;
- }
- void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
- RGB565ToARGBRow_C;
-#if defined(HAS_RGB565TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
- }
- }
-#elif defined(HAS_RGB565TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- RGB565ToARGBRow(src_rgb565, dst_argb, width);
- src_rgb565 += src_stride_rgb565;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert ARGB1555 to ARGB.
-LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_argb1555 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
- src_stride_argb1555 = -src_stride_argb1555;
- }
- // Coalesce rows.
- if (src_stride_argb1555 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb1555 = dst_stride_argb = 0;
- }
- void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
- int pix) = ARGB1555ToARGBRow_C;
-#if defined(HAS_ARGB1555TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
- }
- }
-#elif defined(HAS_ARGB1555TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
- src_argb1555 += src_stride_argb1555;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert ARGB4444 to ARGB.
-LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_argb4444 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
- src_stride_argb4444 = -src_stride_argb4444;
- }
- // Coalesce rows.
- if (src_stride_argb4444 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb4444 = dst_stride_argb = 0;
- }
- void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
- int pix) = ARGB4444ToARGBRow_C;
-#if defined(HAS_ARGB4444TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
- }
- }
-#elif defined(HAS_ARGB4444TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
- src_argb4444 += src_stride_argb4444;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert NV12 to ARGB.
-LIBYUV_API
-int NV12ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !src_uv || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- void (*NV12ToARGBRow)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV12ToARGBRow_C;
-#if defined(HAS_NV12TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- NV12ToARGBRow = NV12ToARGBRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_NV12TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- NV12ToARGBRow(src_y, src_uv, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_uv += src_stride_uv;
- }
- }
- return 0;
-}
-
-// Convert NV21 to ARGB.
-LIBYUV_API
-int NV21ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !src_uv || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- void (*NV21ToARGBRow)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV21ToARGBRow_C;
-#if defined(HAS_NV21TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- NV21ToARGBRow = NV21ToARGBRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_NV21TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV21ToARGBRow = NV21ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- NV21ToARGBRow(src_y, src_uv, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_uv += src_stride_uv;
- }
- }
- return 0;
-}
-
-// Convert M420 to ARGB.
-LIBYUV_API
-int M420ToARGB(const uint8* src_m420, int src_stride_m420,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_m420 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- void (*NV12ToARGBRow)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV12ToARGBRow_C;
-#if defined(HAS_NV12TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- NV12ToARGBRow = NV12ToARGBRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_NV12TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
- NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
- dst_argb + dst_stride_argb, width);
- dst_argb += dst_stride_argb * 2;
- src_m420 += src_stride_m420 * 3;
- }
- if (height & 1) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
- }
- return 0;
-}
-
-// Convert YUY2 to ARGB.
-LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_yuy2 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
- src_stride_yuy2 = -src_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_yuy2 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_yuy2 = dst_stride_argb = 0;
- }
- void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
- YUY2ToARGBRow_C;
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
- // Posix is 16, Windows is 8.
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_YUY2TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- YUY2ToARGBRow = YUY2ToARGBRow_NEON;
- }
- }
-#endif
- for (int y = 0; y < height; ++y) {
- YUY2ToARGBRow(src_yuy2, dst_argb, width);
- src_yuy2 += src_stride_yuy2;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert UYVY to ARGB.
-LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_uyvy || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
- src_stride_uyvy = -src_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_uyvy == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_uyvy = dst_stride_argb = 0;
- }
- void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
- UYVYToARGBRow_C;
-#if defined(HAS_UYVYTOARGBROW_SSSE3)
- // Posix is 16, Windows is 8.
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- UYVYToARGBRow = UYVYToARGBRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_UYVYTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- UYVYToARGBRow = UYVYToARGBRow_NEON;
- }
- }
-#endif
- for (int y = 0; y < height; ++y) {
- UYVYToARGBRow(src_uyvy, dst_argb, width);
- src_uyvy += src_stride_uyvy;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc
deleted file mode 100755
index 1e10832856..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc
+++ /dev/null
@@ -1,1196 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_from.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/convert.h" // For I420Copy
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/scale.h" // For ScalePlane()
-#include "libyuv/video_common.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// I420 To any I4xx YUV format with mirroring.
-static int I420ToI4xx(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_y_width, int src_y_height,
- int dst_uv_width, int dst_uv_height) {
- if (src_y_width == 0 || src_y_height == 0 ||
- dst_uv_width <= 0 || dst_uv_height <= 0) {
- return -1;
- }
- const int dst_y_width = Abs(src_y_width);
- const int dst_y_height = Abs(src_y_height);
- const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
- const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
- ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
- dst_y, dst_stride_y, dst_y_width, dst_y_height,
- kFilterBilinear);
- ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
- dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
- dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- return 0;
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 422 chroma is 1/2 width, 1x height
-LIBYUV_API
-int I420ToI422(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int dst_uv_width = (Abs(width) + 1) >> 1;
- const int dst_uv_height = Abs(height);
- return I420ToI4xx(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- dst_uv_width, dst_uv_height);
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 444 chroma is 1x width, 1x height
-LIBYUV_API
-int I420ToI444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int dst_uv_width = Abs(width);
- const int dst_uv_height = Abs(height);
- return I420ToI4xx(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- dst_uv_width, dst_uv_height);
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 411 chroma is 1/4 width, 1x height
-LIBYUV_API
-int I420ToI411(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int dst_uv_width = (Abs(width) + 3) >> 2;
- const int dst_uv_height = Abs(height);
- return I420ToI4xx(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- dst_uv_width, dst_uv_height);
-}
-
-// Copy to I400. Source can be I420,422,444,400,NV12,NV21
-LIBYUV_API
-int I400Copy(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-LIBYUV_API
-int I422ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_yuy2 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
- dst_stride_yuy2 = -dst_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_yuy2 == width * 2) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
- }
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
- I422ToYUY2Row_C;
-#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_SSE2;
- }
- }
-#elif defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_yuy2 += dst_stride_yuy2;
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_yuy2 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
- dst_stride_yuy2 = -dst_stride_yuy2;
- }
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
- I422ToYUY2Row_C;
-#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_SSE2;
- }
- }
-#elif defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
- I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
- dst_yuy2 + dst_stride_yuy2, width);
- src_y += src_stride_y * 2;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_yuy2 += dst_stride_yuy2 * 2;
- }
- if (height & 1) {
- I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
- }
- return 0;
-}
-
-LIBYUV_API
-int I422ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_uyvy ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
- dst_stride_uyvy = -dst_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_uyvy == width * 2) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
- }
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
- I422ToUYVYRow_C;
-#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_SSE2;
- }
- }
-#elif defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_uyvy += dst_stride_uyvy;
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_uyvy ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
- dst_stride_uyvy = -dst_stride_uyvy;
- }
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
- I422ToUYVYRow_C;
-#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_SSE2;
- }
- }
-#elif defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
- I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
- dst_uyvy + dst_stride_uyvy, width);
- src_y += src_stride_y * 2;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_uyvy += dst_stride_uyvy * 2;
- }
- if (height & 1) {
- I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToNV12(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- int halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_uv = dst_uv + (halfheight - 1) * dst_stride_uv;
- dst_stride_y = -dst_stride_y;
- dst_stride_uv = -dst_stride_uv;
- }
- // Coalesce rows.
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (src_stride_y == width &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_y = 0;
- }
- // Coalesce rows.
- if (src_stride_u == halfwidth &&
- src_stride_v == halfwidth &&
- dst_stride_uv == halfwidth * 2) {
- halfwidth *= halfheight;
- halfheight = 1;
- src_stride_u = src_stride_v = dst_stride_uv = 0;
- }
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
-#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
- MergeUVRow_ = MergeUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
- IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
- IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
- MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- MergeUVRow_ = MergeUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
- MergeUVRow_ = MergeUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_NEON;
- }
- }
-#endif
-
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- for (int y = 0; y < halfheight; ++y) {
- // Merge a row of U and V into a row of UV.
- MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_uv += dst_stride_uv;
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToNV21(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height) {
- return I420ToNV12(src_y, src_stride_y,
- src_v, src_stride_v,
- src_u, src_stride_u,
- dst_y, src_stride_y,
- dst_vu, dst_stride_vu,
- width, height);
-}
-
-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to BGRA.
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_bgra ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
- dst_stride_bgra = -dst_stride_bgra;
- }
- void (*I422ToBGRARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToBGRARow_C;
-#if defined(HAS_I422TOBGRAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
- I422ToBGRARow = I422ToBGRARow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I422TOBGRAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToBGRARow = I422ToBGRARow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_NEON;
- }
- }
-#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
- I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
- dst_bgra += dst_stride_bgra;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to ABGR.
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_abgr ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
- dst_stride_abgr = -dst_stride_abgr;
- }
- void (*I422ToABGRRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToABGRRow_C;
-#if defined(HAS_I422TOABGRROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
- I422ToABGRRow = I422ToABGRRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I422TOABGRROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToABGRRow = I422ToABGRRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
- dst_abgr += dst_stride_abgr;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RGBA.
-LIBYUV_API
-int I420ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_rgba ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
- dst_stride_rgba = -dst_stride_rgba;
- }
- void (*I422ToRGBARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGBARow_C;
-#if defined(HAS_I422TORGBAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
- I422ToRGBARow = I422ToRGBARow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I422TORGBAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRGBARow = I422ToRGBARow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
- dst_rgba += dst_stride_rgba;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RGB24.
-LIBYUV_API
-int I420ToRGB24(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgb24, int dst_stride_rgb24,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_rgb24 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
- dst_stride_rgb24 = -dst_stride_rgb24;
- }
- void (*I422ToRGB24Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGB24Row_C;
-#if defined(HAS_I422TORGB24ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB24Row = I422ToRGB24Row_SSSE3;
- }
- }
-#elif defined(HAS_I422TORGB24ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB24Row = I422ToRGB24Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
- dst_rgb24 += dst_stride_rgb24;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RAW.
-LIBYUV_API
-int I420ToRAW(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_raw, int dst_stride_raw,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_raw ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_raw = dst_raw + (height - 1) * dst_stride_raw;
- dst_stride_raw = -dst_stride_raw;
- }
- void (*I422ToRAWRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRAWRow_C;
-#if defined(HAS_I422TORAWROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRAWRow = I422ToRAWRow_SSSE3;
- }
- }
-#elif defined(HAS_I422TORAWROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRAWRow = I422ToRAWRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRAWRow = I422ToRAWRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
- dst_raw += dst_stride_raw;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to ARGB1555.
-LIBYUV_API
-int I420ToARGB1555(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb1555, int dst_stride_argb1555,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_argb1555 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555;
- dst_stride_argb1555 = -dst_stride_argb1555;
- }
- void (*I422ToARGB1555Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGB1555Row_C;
-#if defined(HAS_I422TOARGB1555ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
- }
- }
-#elif defined(HAS_I422TOARGB1555ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB1555Row = I422ToARGB1555Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
- dst_argb1555 += dst_stride_argb1555;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-
-// Convert I420 to ARGB4444.
-LIBYUV_API
-int I420ToARGB4444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb4444, int dst_stride_argb4444,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_argb4444 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444;
- dst_stride_argb4444 = -dst_stride_argb4444;
- }
- void (*I422ToARGB4444Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGB4444Row_C;
-#if defined(HAS_I422TOARGB4444ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
- }
- }
-#elif defined(HAS_I422TOARGB4444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB4444Row = I422ToARGB4444Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
- dst_argb4444 += dst_stride_argb4444;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RGB565.
-LIBYUV_API
-int I420ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- if (!src_y || !src_u || !src_v || !dst_rgb565 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
- }
- void (*I422ToRGB565Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGB565Row_C;
-#if defined(HAS_I422TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB565Row = I422ToRGB565Row_SSSE3;
- }
- }
-#elif defined(HAS_I422TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB565Row = I422ToRGB565Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to specified format
-LIBYUV_API
-int ConvertFromI420(const uint8* y, int y_stride,
- const uint8* u, int u_stride,
- const uint8* v, int v_stride,
- uint8* dst_sample, int dst_sample_stride,
- int width, int height,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
- if (!y || !u|| !v || !dst_sample ||
- width <= 0 || height == 0) {
- return -1;
- }
- int r = 0;
- switch (format) {
- // Single plane formats
- case FOURCC_YUY2:
- r = I420ToYUY2(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_UYVY:
- r = I420ToUYVY(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_RGBP:
- r = I420ToRGB565(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_RGBO:
- r = I420ToARGB1555(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_R444:
- r = I420ToARGB4444(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_24BG:
- r = I420ToRGB24(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 3,
- width, height);
- break;
- case FOURCC_RAW:
- r = I420ToRAW(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 3,
- width, height);
- break;
- case FOURCC_ARGB:
- r = I420ToARGB(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_BGRA:
- r = I420ToBGRA(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_ABGR:
- r = I420ToABGR(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_RGBA:
- r = I420ToRGBA(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_BGGR:
- r = I420ToBayerBGGR(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_GBRG:
- r = I420ToBayerGBRG(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_GRBG:
- r = I420ToBayerGRBG(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_RGGB:
- r = I420ToBayerRGGB(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_I400:
- r = I400Copy(y, y_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_NV12: {
- uint8* dst_uv = dst_sample + width * height;
- r = I420ToNV12(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- dst_uv,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- }
- case FOURCC_NV21: {
- uint8* dst_vu = dst_sample + width * height;
- r = I420ToNV21(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- dst_vu,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- }
- // TODO(fbarchard): Add M420 and Q420.
- // Triplanar formats
- // TODO(fbarchard): halfstride instead of halfwidth
- case FOURCC_I420:
- case FOURCC_YU12:
- case FOURCC_YV12: {
- int halfwidth = (width + 1) / 2;
- int halfheight = (height + 1) / 2;
- uint8* dst_u;
- uint8* dst_v;
- if (format == FOURCC_YV12) {
- dst_v = dst_sample + width * height;
- dst_u = dst_v + halfwidth * halfheight;
- } else {
- dst_u = dst_sample + width * height;
- dst_v = dst_u + halfwidth * halfheight;
- }
- r = I420Copy(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, halfwidth,
- dst_v, halfwidth,
- width, height);
- break;
- }
- case FOURCC_I422:
- case FOURCC_YV16: {
- int halfwidth = (width + 1) / 2;
- uint8* dst_u;
- uint8* dst_v;
- if (format == FOURCC_YV16) {
- dst_v = dst_sample + width * height;
- dst_u = dst_v + halfwidth * height;
- } else {
- dst_u = dst_sample + width * height;
- dst_v = dst_u + halfwidth * height;
- }
- r = I420ToI422(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, halfwidth,
- dst_v, halfwidth,
- width, height);
- break;
- }
- case FOURCC_I444:
- case FOURCC_YV24: {
- uint8* dst_u;
- uint8* dst_v;
- if (format == FOURCC_YV24) {
- dst_v = dst_sample + width * height;
- dst_u = dst_v + width * height;
- } else {
- dst_u = dst_sample + width * height;
- dst_v = dst_u + width * height;
- }
- r = I420ToI444(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, width,
- dst_v, width,
- width, height);
- break;
- }
- case FOURCC_I411: {
- int quarterwidth = (width + 3) / 4;
- uint8* dst_u = dst_sample + width * height;
- uint8* dst_v = dst_u + quarterwidth * height;
- r = I420ToI411(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, quarterwidth,
- dst_v, quarterwidth,
- width, height);
- break;
- }
-
- // Formats not supported - MJPG, biplanar, some rgb formats.
- default:
- return -1; // unknown fourcc - return failure code.
- }
- return r;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc
deleted file mode 100755
index 41421fb30b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_from_argb.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// ARGB little endian (bgra in memory) to I444
-LIBYUV_API
-int ARGBToI444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width &&
- dst_stride_u == width &&
- dst_stride_v == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV444Row_C;
-#if defined(HAS_ARGBTOUV444ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV444Row = ARGBToUV444Row_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- ARGBToUV444Row = ARGBToUV444Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToUV444Row(src_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// ARGB little endian (bgra in memory) to I422
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width &&
- dst_stride_u * 2 == width &&
- dst_stride_v * 2 == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
- void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
- }
- }
-#endif
-
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToUV422Row(src_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// ARGB little endian (bgra in memory) to I411
-LIBYUV_API
-int ARGBToI411(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width &&
- dst_stride_u * 4 == width &&
- dst_stride_v * 4 == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
- void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV411Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 32) {
- ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUV411Row = ARGBToUV411Row_NEON;
- }
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToUV411Row(src_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-LIBYUV_API
-int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- if (!src_argb ||
- !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
- }
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
- }
-#endif
- int halfwidth = (width + 1) >> 1;
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
-#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
- MergeUVRow_ = MergeUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
- MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- MergeUVRow_ = MergeUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
- MergeUVRow_ = MergeUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_NEON;
- }
- }
-#endif
-
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
-
- for (int y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
- return 0;
-}
-
-// Same as NV12 but U and V swapped.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- if (!src_argb ||
- !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
- }
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
- }
-#endif
- int halfwidth = (width + 1) >> 1;
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
-#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
- MergeUVRow_ = MergeUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
- MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- MergeUVRow_ = MergeUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
- MergeUVRow_ = MergeUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_NEON;
- }
- }
-#endif
-
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
- uint8* row_v = row_u + ((halfwidth + 15) & ~15);
-
- for (int y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
- return 0;
-}
-
-// Convert ARGB to YUY2.
-LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height) {
- if (!src_argb || !dst_yuy2 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
- dst_stride_yuy2 = -dst_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_yuy2 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_yuy2 = 0;
- }
- void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
- }
- }
-#endif
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
- }
-#endif
-
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
- I422ToYUY2Row_C;
-#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_SSE2;
- }
- }
-#elif defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_NEON;
- }
- }
-#endif
-
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-
- for (int y = 0; y < height; ++y) {
- ARGBToUV422Row(src_argb, row_u, row_v, width);
- ARGBToYRow(src_argb, row_y, width);
- I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
- src_argb += src_stride_argb;
- dst_yuy2 += dst_stride_yuy2;
- }
-
- free_aligned_buffer_64(row_y);
- return 0;
-}
-
-// Convert ARGB to UYVY.
-LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height) {
- if (!src_argb || !dst_uyvy ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
- dst_stride_uyvy = -dst_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_uyvy == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_uyvy = 0;
- }
- void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
- }
- }
-#endif
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 16) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
- }
-#endif
-
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
- I422ToUYVYRow_C;
-#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_SSE2;
- }
- }
-#elif defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
- I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_NEON;
- }
- }
-#endif
-
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-
- for (int y = 0; y < height; ++y) {
- ARGBToUV422Row(src_argb, row_u, row_v, width);
- ARGBToYRow(src_argb, row_y, width);
- I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
- src_argb += src_stride_argb;
- dst_uyvy += dst_stride_uyvy;
- }
-
- free_aligned_buffer_64(row_y);
- return 0;
-}
-
-// Convert ARGB to I400.
-LIBYUV_API
-int ARGBToI400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_argb || !dst_y || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = 0;
- }
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- }
- return 0;
-}
-
-// Shuffle table for converting ARGB to RGBA.
-static uvec8 kShuffleMaskARGBToRGBA = {
- 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u
-};
-
-// Convert ARGB to RGBA.
-LIBYUV_API
-int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height) {
- return ARGBShuffle(src_argb, src_stride_argb,
- dst_rgba, dst_stride_rgba,
- (const uint8*)(&kShuffleMaskARGBToRGBA),
- width, height);
-}
-
-// Convert ARGB To RGB24.
-LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb24, int dst_stride_rgb24,
- int width, int height) {
- if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_rgb24 == width * 3) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_rgb24 = 0;
- }
- void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToRGB24Row_C;
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
- }
- }
-#elif defined(HAS_ARGBTORGB24ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB24Row = ARGBToRGB24Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToRGB24Row(src_argb, dst_rgb24, width);
- src_argb += src_stride_argb;
- dst_rgb24 += dst_stride_rgb24;
- }
- return 0;
-}
-
-// Convert ARGB To RAW.
-LIBYUV_API
-int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
- uint8* dst_raw, int dst_stride_raw,
- int width, int height) {
- if (!src_argb || !dst_raw || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_raw == width * 3) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_raw = 0;
- }
- void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToRAWRow_C;
-#if defined(HAS_ARGBTORAWROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToRAWRow = ARGBToRAWRow_SSSE3;
- }
- }
-#elif defined(HAS_ARGBTORAWROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRAWRow = ARGBToRAWRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToRAWRow(src_argb, dst_raw, width);
- src_argb += src_stride_argb;
- dst_raw += dst_stride_raw;
- }
- return 0;
-}
-
-// Convert ARGB To RGB565.
-LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_rgb565 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_rgb565 = 0;
- }
- void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToRGB565Row_C;
-#if defined(HAS_ARGBTORGB565ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
- }
- }
-#elif defined(HAS_ARGBTORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB565Row = ARGBToRGB565Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToRGB565Row(src_argb, dst_rgb565, width);
- src_argb += src_stride_argb;
- dst_rgb565 += dst_stride_rgb565;
- }
- return 0;
-}
-
-// Convert ARGB To ARGB1555.
-LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb1555, int dst_stride_argb1555,
- int width, int height) {
- if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb1555 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb1555 = 0;
- }
- void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToARGB1555Row_C;
-#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
- }
- }
-#elif defined(HAS_ARGBTOARGB1555ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToARGB1555Row(src_argb, dst_argb1555, width);
- src_argb += src_stride_argb;
- dst_argb1555 += dst_stride_argb1555;
- }
- return 0;
-}
-
-// Convert ARGB To ARGB4444.
-LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb4444, int dst_stride_argb4444,
- int width, int height) {
- if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb4444 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb4444 = 0;
- }
- void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToARGB4444Row_C;
-#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
- }
- }
-#elif defined(HAS_ARGBTOARGB4444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToARGB4444Row(src_argb, dst_argb4444, width);
- src_argb += src_stride_argb;
- dst_argb4444 += dst_stride_argb4444;
- }
- return 0;
-}
-
-// Convert ARGB to J420. (JPeg full range I420).
-LIBYUV_API
-int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_argb ||
- !dst_yj || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
- ARGBToYJRow_C;
-#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
- ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_SSSE3;
- if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
- }
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- if (width >= 16) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
- }
- }
- }
-#endif
-
- for (int y = 0; y < height - 1; y += 2) {
- ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
- ARGBToYJRow(src_argb, dst_yj, width);
- ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
- src_argb += src_stride_argb * 2;
- dst_yj += dst_stride_yj * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
- ARGBToYJRow(src_argb, dst_yj, width);
- }
- return 0;
-}
-
-// Convert ARGB to J400.
-LIBYUV_API
-int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- int width, int height) {
- if (!src_argb || !dst_yj || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_yj == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_yj = 0;
- }
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
- ARGBToYJRow_C;
-#if defined(HAS_ARGBTOYJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- }
-#endif
-
- for (int y = 0; y < height; ++y) {
- ARGBToYJRow(src_argb, dst_yj, width);
- src_argb += src_stride_argb;
- dst_yj += dst_stride_yj;
- }
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc
deleted file mode 100755
index bcb980f7f1..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert.h"
-
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#ifdef HAVE_JPEG
-struct I420Buffers {
- uint8* y;
- int y_stride;
- uint8* u;
- int u_stride;
- uint8* v;
- int v_stride;
- int w;
- int h;
-};
-
-static void JpegCopyI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I420Copy(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI422ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I422ToI420(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI444ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I444ToI420(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI411ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I411ToI420(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI400ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I400ToI420(data[0], strides[0],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-// Query size of MJPG in pixels.
-LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size,
- int* width, int* height) {
- MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
- if (ret) {
- *width = mjpeg_decoder.GetWidth();
- *height = mjpeg_decoder.GetHeight();
- }
- mjpeg_decoder.UnloadFrame();
- return ret ? 0 : -1; // -1 for runtime failure.
-}
-
-// MJPG (Motion JPeg) to I420
-// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
-LIBYUV_API
-int MJPGToI420(const uint8* sample,
- size_t sample_size,
- uint8* y, int y_stride,
- uint8* u, int u_stride,
- uint8* v, int v_stride,
- int w, int h,
- int dw, int dh) {
- if (sample_size == kUnknownDataSize) {
- // ERROR: MJPEG frame size unknown
- return -1;
- }
-
- // TODO(fbarchard): Port MJpeg to C.
- MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
- if (ret && (mjpeg_decoder.GetWidth() != w ||
- mjpeg_decoder.GetHeight() != h)) {
- // ERROR: MJPEG frame has unexpected dimensions
- mjpeg_decoder.UnloadFrame();
- return 1; // runtime failure
- }
- if (ret) {
- I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
- // YUV420
- if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 2 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
- // YUV422
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
- // YUV444
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
- // YUV411
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
- // YUV400
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceGrayscale &&
- mjpeg_decoder.GetNumComponents() == 1 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
- } else {
- // TODO(fbarchard): Implement conversion for any other colorspace/sample
- // factors that occur in practice. 411 is supported by libjpeg
- // ERROR: Unable to convert MJPEG frame because format is not supported
- mjpeg_decoder.UnloadFrame();
- return 1;
- }
- }
- return ret ? 0 : 1;
-}
-
-#ifdef HAVE_JPEG
-struct ARGBBuffers {
- uint8* argb;
- int argb_stride;
- int w;
- int h;
-};
-
-static void JpegI420ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I420ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI422ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I422ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI444ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I444ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI411ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I411ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI400ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I400ToARGB(data[0], strides[0],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-// MJPG (Motion JPeg) to ARGB
-// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample,
- size_t sample_size,
- uint8* argb, int argb_stride,
- int w, int h,
- int dw, int dh) {
- if (sample_size == kUnknownDataSize) {
- // ERROR: MJPEG frame size unknown
- return -1;
- }
-
- // TODO(fbarchard): Port MJpeg to C.
- MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
- if (ret && (mjpeg_decoder.GetWidth() != w ||
- mjpeg_decoder.GetHeight() != h)) {
- // ERROR: MJPEG frame has unexpected dimensions
- mjpeg_decoder.UnloadFrame();
- return 1; // runtime failure
- }
- if (ret) {
- ARGBBuffers bufs = { argb, argb_stride, dw, dh };
- // YUV420
- if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 2 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
- // YUV422
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
- // YUV444
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
- // YUV411
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
- // YUV400
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceGrayscale &&
- mjpeg_decoder.GetNumComponents() == 1 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
- } else {
- // TODO(fbarchard): Implement conversion for any other colorspace/sample
- // factors that occur in practice. 411 is supported by libjpeg
- // ERROR: Unable to convert MJPEG frame because format is not supported
- mjpeg_decoder.UnloadFrame();
- return 1;
- }
- }
- return ret ? 0 : 1;
-}
-#endif
-
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc
deleted file mode 100755
index 1b228a7b4d..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_argb.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// src_width is used for source stride computation
-// src_height is used to compute location of planes, and indicate inversion
-// sample_size is measured in bytes and is the size of the frame.
-// With MJPEG it is the compressed size of the frame.
-LIBYUV_API
-int ConvertToARGB(const uint8* sample, size_t sample_size,
- uint8* crop_argb, int argb_stride,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
- int aligned_src_width = (src_width + 1) & ~1;
- const uint8* src;
- const uint8* src_uv;
- int abs_src_height = (src_height < 0) ? -src_height : src_height;
- int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
- int r = 0;
-
- // One pass rotation is available for some formats. For the rest, convert
- // to I420 (with optional vertical flipping) into a temporary I420 buffer,
- // and then rotate the I420 to the final destination buffer.
- // For in-place conversion, if destination crop_argb is same as source sample,
- // also enable temporary buffer.
- LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
- crop_argb == sample;
- uint8* tmp_argb = crop_argb;
- int tmp_argb_stride = argb_stride;
- uint8* rotate_buffer = NULL;
- int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-
- if (crop_argb == NULL || sample == NULL ||
- src_width <= 0 || crop_width <= 0 ||
- src_height == 0 || crop_height == 0) {
- return -1;
- }
- if (src_height < 0) {
- inv_crop_height = -inv_crop_height;
- }
-
- if (need_buf) {
- int argb_size = crop_width * abs_crop_height * 4;
- rotate_buffer = (uint8*)malloc(argb_size);
- if (!rotate_buffer) {
- return 1; // Out of memory runtime error.
- }
- crop_argb = rotate_buffer;
- argb_stride = crop_width;
- }
-
- switch (format) {
- // Single plane formats
- case FOURCC_YUY2:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = YUY2ToARGB(src, aligned_src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_UYVY:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = UYVYToARGB(src, aligned_src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_24BG:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RGB24ToARGB(src, src_width * 3,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RAW:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RAWToARGB(src, src_width * 3,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ARGB:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ARGBToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_BGRA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = BGRAToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ABGR:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ABGRToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = RGBAToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBP:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = RGB565ToARGB(src, src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBO:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB1555ToARGB(src, src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_R444:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB4444ToARGB(src, src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- // TODO(fbarchard): Support cropping Bayer by odd numbers
- // by adjusting fourcc.
- case FOURCC_BGGR:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerBGGRToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- case FOURCC_GBRG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGBRGToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- case FOURCC_GRBG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGRBGToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- case FOURCC_RGGB:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerRGGBToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- case FOURCC_I400:
- src = sample + src_width * crop_y + crop_x;
- r = I400ToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- // Biplanar formats
- case FOURCC_NV12:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
- r = NV12ToARGB(src, src_width,
- src_uv, aligned_src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_NV21:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
- // Call NV12 but with u and v parameters swapped.
- r = NV21ToARGB(src, src_width,
- src_uv, aligned_src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_M420:
- src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
- r = M420ToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-// case FOURCC_Q420:
-// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
-// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
-// src_width + crop_x * 2;
-// r = Q420ToARGB(src, src_width * 3,
-// src_uv, src_width * 3,
-// crop_argb, argb_stride,
-// crop_width, inv_crop_height);
-// break;
- // Triplanar formats
- case FOURCC_I420:
- case FOURCC_YU12:
- case FOURCC_YV12: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- int halfheight = (abs_src_height + 1) / 2;
- if (format == FOURCC_YV12) {
- src_v = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- }
- r = I420ToARGB(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I422:
- case FOURCC_YV16: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- if (format == FOURCC_YV16) {
- src_v = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- }
- r = I422ToARGB(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I444:
- case FOURCC_YV24: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- if (format == FOURCC_YV24) {
- src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- } else {
- src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- }
- r = I444ToARGB(src_y, src_width,
- src_u, src_width,
- src_v, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I411: {
- int quarterwidth = (src_width + 3) / 4;
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u = sample + src_width * abs_src_height +
- quarterwidth * crop_y + crop_x / 4;
- const uint8* src_v = sample + src_width * abs_src_height +
- quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
- r = I411ToARGB(src_y, src_width,
- src_u, quarterwidth,
- src_v, quarterwidth,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
-#ifdef HAVE_JPEG
- case FOURCC_MJPG:
- r = MJPGToARGB(sample, sample_size,
- crop_argb, argb_stride,
- src_width, abs_src_height, crop_width, inv_crop_height);
- break;
-#endif
- default:
- r = -1; // unknown fourcc - return failure code.
- }
-
- if (need_buf) {
- if (!r) {
- r = ARGBRotate(crop_argb, argb_stride,
- tmp_argb, tmp_argb_stride,
- crop_width, abs_crop_height, rotation);
- }
- free(rotate_buffer);
- }
-
- return r;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc
deleted file mode 100755
index 7b194fff72..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-
-#include "libyuv/convert.h"
-
-#include "libyuv/format_conversion.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// src_width is used for source stride computation
-// src_height is used to compute location of planes, and indicate inversion
-// sample_size is measured in bytes and is the size of the frame.
-// With MJPEG it is the compressed size of the frame.
-LIBYUV_API
-int ConvertToI420(const uint8* sample,
- size_t sample_size,
- uint8* y, int y_stride,
- uint8* u, int u_stride,
- uint8* v, int v_stride,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
- int aligned_src_width = (src_width + 1) & ~1;
- const uint8* src;
- const uint8* src_uv;
- int abs_src_height = (src_height < 0) ? -src_height : src_height;
- int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
- int r = 0;
- LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
- format != FOURCC_NV12 && format != FOURCC_NV21 &&
- format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
- uint8* tmp_y = y;
- uint8* tmp_u = u;
- uint8* tmp_v = v;
- int tmp_y_stride = y_stride;
- int tmp_u_stride = u_stride;
- int tmp_v_stride = v_stride;
- uint8* rotate_buffer = NULL;
- int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-
- if (!y || !u || !v || !sample ||
- src_width <= 0 || crop_width <= 0 ||
- src_height == 0 || crop_height == 0) {
- return -1;
- }
- if (src_height < 0) {
- inv_crop_height = -inv_crop_height;
- }
-
- // One pass rotation is available for some formats. For the rest, convert
- // to I420 (with optional vertical flipping) into a temporary I420 buffer,
- // and then rotate the I420 to the final destination buffer.
- // For in-place conversion, if destination y is same as source sample,
- // also enable temporary buffer.
- if (need_buf) {
- int y_size = crop_width * abs_crop_height;
- int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
- rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
- if (!rotate_buffer) {
- return 1; // Out of memory runtime error.
- }
- y = rotate_buffer;
- u = y + y_size;
- v = u + uv_size;
- y_stride = crop_width;
- u_stride = v_stride = ((crop_width + 1) / 2);
- }
-
- switch (format) {
- // Single plane formats
- case FOURCC_YUY2:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = YUY2ToI420(src, aligned_src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_UYVY:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = UYVYToI420(src, aligned_src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBP:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = RGB565ToI420(src, src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBO:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB1555ToI420(src, src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_R444:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB4444ToI420(src, src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_24BG:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RGB24ToI420(src, src_width * 3,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RAW:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RAWToI420(src, src_width * 3,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ARGB:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ARGBToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_BGRA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = BGRAToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ABGR:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ABGRToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = RGBAToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- // TODO(fbarchard): Support cropping Bayer by odd numbers
- // by adjusting fourcc.
- case FOURCC_BGGR:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerBGGRToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_GBRG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGBRGToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_GRBG:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerGRBGToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGGB:
- src = sample + (src_width * crop_y + crop_x);
- r = BayerRGGBToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_I400:
- src = sample + src_width * crop_y + crop_x;
- r = I400ToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- // Biplanar formats
- case FOURCC_NV12:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
- r = NV12ToI420Rotate(src, src_width,
- src_uv, aligned_src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height, rotation);
- break;
- case FOURCC_NV21:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
- // Call NV12 but with u and v parameters swapped.
- r = NV12ToI420Rotate(src, src_width,
- src_uv, aligned_src_width,
- y, y_stride,
- v, v_stride,
- u, u_stride,
- crop_width, inv_crop_height, rotation);
- break;
- case FOURCC_M420:
- src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
- r = M420ToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_Q420:
- src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
- src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
- src_width + crop_x * 2;
- r = Q420ToI420(src, src_width * 3,
- src_uv, src_width * 3,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- // Triplanar formats
- case FOURCC_I420:
- case FOURCC_YU12:
- case FOURCC_YV12: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- int halfheight = (abs_src_height + 1) / 2;
- if (format == FOURCC_YV12) {
- src_v = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- }
- r = I420Rotate(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height, rotation);
- break;
- }
- case FOURCC_I422:
- case FOURCC_YV16: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- if (format == FOURCC_YV16) {
- src_v = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- }
- r = I422ToI420(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I444:
- case FOURCC_YV24: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- if (format == FOURCC_YV24) {
- src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- } else {
- src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- }
- r = I444ToI420(src_y, src_width,
- src_u, src_width,
- src_v, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I411: {
- int quarterwidth = (src_width + 3) / 4;
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u = sample + src_width * abs_src_height +
- quarterwidth * crop_y + crop_x / 4;
- const uint8* src_v = sample + src_width * abs_src_height +
- quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
- r = I411ToI420(src_y, src_width,
- src_u, quarterwidth,
- src_v, quarterwidth,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- }
-#ifdef HAVE_JPEG
- case FOURCC_MJPG:
- r = MJPGToI420(sample, sample_size,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- src_width, abs_src_height, crop_width, inv_crop_height);
- break;
-#endif
- default:
- r = -1; // unknown fourcc - return failure code.
- }
-
- if (need_buf) {
- if (!r) {
- r = I420Rotate(y, y_stride,
- u, u_stride,
- v, v_stride,
- tmp_y, tmp_y_stride,
- tmp_u, tmp_u_stride,
- tmp_v, tmp_v_stride,
- crop_width, abs_crop_height, rotation);
- }
- free(rotate_buffer);
- }
-
- return r;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc b/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc
deleted file mode 100755
index f52bd95551..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/cpu_id.h"
-
-#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection
-#include "cpu-features.h"
-#endif
-
-#ifdef _MSC_VER
-#include <intrin.h> // For __cpuidex()
-#endif
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- !defined(__native_client__) && defined(_M_X64) && \
- defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
-#include <immintrin.h> // For _xgetbv()
-#endif
-
-#if !defined(__native_client__)
-#include <stdlib.h> // For getenv()
-#endif
-
-// For ArmCpuCaps() but unittested on all platforms
-#include <stdio.h>
-#include <string.h>
-
-#include "libyuv/basic_types.h" // For CPU_X86
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// For functions that use the stack and have runtime checks for overflow,
-// use SAFEBUFFERS to avoid additional check.
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
-#define SAFEBUFFERS __declspec(safebuffers)
-#else
-#define SAFEBUFFERS
-#endif
-
-// Low level cpuid for X86. Returns zeros on other CPUs.
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- (defined(_M_IX86) || defined(_M_X64) || \
- defined(__i386__) || defined(__x86_64__))
-LIBYUV_API
-void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
-#if defined(_MSC_VER)
-#if (_MSC_FULL_VER >= 160040219)
- __cpuidex((int*)(cpu_info), info_eax, info_ecx);
-#elif defined(_M_IX86)
- __asm {
- mov eax, info_eax
- mov ecx, info_ecx
- mov edi, cpu_info
- cpuid
- mov [edi], eax
- mov [edi + 4], ebx
- mov [edi + 8], ecx
- mov [edi + 12], edx
- }
-#else
- if (info_ecx == 0) {
- __cpuid((int*)(cpu_info), info_eax);
- } else {
- cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
- }
-#endif
-#else // defined(_MSC_VER)
- uint32 info_ebx, info_edx;
- asm volatile ( // NOLINT
-#if defined( __i386__) && defined(__PIC__)
- // Preserve ebx for fpic 32 bit.
- "mov %%ebx, %%edi \n"
- "cpuid \n"
- "xchg %%edi, %%ebx \n"
- : "=D" (info_ebx),
-#else
- "cpuid \n"
- : "=b" (info_ebx),
-#endif // defined( __i386__) && defined(__PIC__)
- "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
- cpu_info[0] = info_eax;
- cpu_info[1] = info_ebx;
- cpu_info[2] = info_ecx;
- cpu_info[3] = info_edx;
-#endif // defined(_MSC_VER)
-}
-
-#if !defined(__native_client__)
-#define HAS_XGETBV
-// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int TestOsSaveYmm() {
- uint32 xcr0 = 0u;
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
- xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
-#elif defined(_M_IX86)
- __asm {
- xor ecx, ecx // xcr 0
- _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
- mov xcr0, eax
- }
-#elif defined(__i386__) || defined(__x86_64__)
- asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
-#endif // defined(_MSC_VER)
- return((xcr0 & 6) == 6); // Is ymm saved?
-}
-#endif // !defined(__native_client__)
-#else
-LIBYUV_API
-void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
- cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
-}
-#endif
-
-// based on libvpx arm_cpudetect.c
-// For Arm, but public to allow testing on any CPU
-LIBYUV_API SAFEBUFFERS
-int ArmCpuCaps(const char* cpuinfo_name) {
- char cpuinfo_line[512];
- FILE* f = fopen(cpuinfo_name, "r");
- if (!f) {
- // Assume Neon if /proc/cpuinfo is unavailable.
- // This will occur for Chrome sandbox for Pepper or Render process.
- return kCpuHasNEON;
- }
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
- if (memcmp(cpuinfo_line, "Features", 8) == 0) {
- char* p = strstr(cpuinfo_line, " neon");
- if (p && (p[5] == ' ' || p[5] == '\n')) {
- fclose(f);
- return kCpuHasNEON;
- }
- }
- }
- fclose(f);
- return 0;
-}
-
-#if defined(__mips__) && defined(__linux__)
-static int MipsCpuCaps(const char* search_string) {
- char cpuinfo_line[512];
- const char* file_name = "/proc/cpuinfo";
- FILE* f = fopen(file_name, "r");
- if (!f) {
- // Assume DSP if /proc/cpuinfo is unavailable.
- // This will occur for Chrome sandbox for Pepper or Render process.
- return kCpuHasMIPS_DSP;
- }
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
- if (strstr(cpuinfo_line, search_string) != NULL) {
- fclose(f);
- return kCpuHasMIPS_DSP;
- }
- }
- fclose(f);
- return 0;
-}
-#endif
-
-// CPU detect function for SIMD instruction sets.
-LIBYUV_API
-int cpu_info_ = kCpuInit; // cpu_info is not initialized yet.
-
-// Test environment variable for disabling CPU features. Any non-zero value
-// to disable. Zero ignored to make it easy to set the variable on/off.
-#if !defined(__native_client__) && !defined(_M_ARM)
-
-static LIBYUV_BOOL TestEnv(const char* name) {
-#ifndef _WINRT
- const char* var = getenv(name);
- if (var) {
- if (var[0] != '0') {
- return LIBYUV_TRUE;
- }
- }
-#endif
- return LIBYUV_FALSE;
-}
-#else // nacl does not support getenv().
-static LIBYUV_BOOL TestEnv(const char*) {
- return LIBYUV_FALSE;
-}
-#endif
-
-LIBYUV_API SAFEBUFFERS
-int InitCpuFlags(void) {
-#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
-
- uint32 cpu_info1[4] = { 0, 0, 0, 0 };
- uint32 cpu_info7[4] = { 0, 0, 0, 0 };
- CpuId(1, 0, cpu_info1);
- CpuId(7, 0, cpu_info7);
- cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
- ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
- ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
- ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
- ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
- ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
- kCpuHasX86;
-#ifdef HAS_XGETBV
- if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
- TestOsSaveYmm()) { // Saves YMM.
- cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
- kCpuHasAVX;
- }
-#endif
- // Environment variable overrides for testing.
- if (TestEnv("LIBYUV_DISABLE_X86")) {
- cpu_info_ &= ~kCpuHasX86;
- }
- if (TestEnv("LIBYUV_DISABLE_SSE2")) {
- cpu_info_ &= ~kCpuHasSSE2;
- }
- if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
- cpu_info_ &= ~kCpuHasSSSE3;
- }
- if (TestEnv("LIBYUV_DISABLE_SSE41")) {
- cpu_info_ &= ~kCpuHasSSE41;
- }
- if (TestEnv("LIBYUV_DISABLE_SSE42")) {
- cpu_info_ &= ~kCpuHasSSE42;
- }
- if (TestEnv("LIBYUV_DISABLE_AVX")) {
- cpu_info_ &= ~kCpuHasAVX;
- }
- if (TestEnv("LIBYUV_DISABLE_AVX2")) {
- cpu_info_ &= ~kCpuHasAVX2;
- }
- if (TestEnv("LIBYUV_DISABLE_ERMS")) {
- cpu_info_ &= ~kCpuHasERMS;
- }
- if (TestEnv("LIBYUV_DISABLE_FMA3")) {
- cpu_info_ &= ~kCpuHasFMA3;
- }
-#elif defined(__mips__) && defined(__linux__)
- // Linux mips parse text file for dsp detect.
- cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
-#if defined(__mips_dspr2)
- cpu_info_ |= kCpuHasMIPS_DSPR2;
-#endif
- cpu_info_ |= kCpuHasMIPS;
-
- if (getenv("LIBYUV_DISABLE_MIPS")) {
- cpu_info_ &= ~kCpuHasMIPS;
- }
- if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
- cpu_info_ &= ~kCpuHasMIPS_DSP;
- }
- if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
- cpu_info_ &= ~kCpuHasMIPS_DSPR2;
- }
-#elif defined(__arm__)
-// gcc -mfpu=neon defines __ARM_NEON__
-// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
-// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
-#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
-#ifdef _ANDROID
- cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); // libtheoraplayer #ifdef addition, just in case, android gave us troubles
-#else
- cpu_info_ = kCpuHasNEON;
-#endif
-#else
- // Linux arm parse text file for neon detect.
- cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
-#endif
- cpu_info_ |= kCpuHasARM;
- if (TestEnv("LIBYUV_DISABLE_NEON")) {
- cpu_info_ &= ~kCpuHasNEON;
- }
-#ifdef _ANDROID
- // libtheoraplayer addition to disable NEON support on android devices that don't support it, once again, just in case
- if ((android_getCpuFeaturesExt() & ANDROID_CPU_ARM_FEATURE_NEON) == 0)
- {
- cpu_info_ = kCpuHasARM;
- }
-#endif
-#endif // __arm__
- if (TestEnv("LIBYUV_DISABLE_ASM")) {
- cpu_info_ = 0;
- }
- return cpu_info_;
-}
-
-LIBYUV_API
-void MaskCpuFlags(int enable_flags) {
- cpu_info_ = InitCpuFlags() & enable_flags;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc b/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc
deleted file mode 100755
index a3daf96a98..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/format_conversion.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/video_common.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// generate a selector mask useful for pshufb
-static uint32 GenerateSelector(int select0, int select1) {
- return (uint32)(select0) |
- (uint32)((select1 + 4) << 8) |
- (uint32)((select0 + 8) << 16) |
- (uint32)((select1 + 12) << 24);
-}
-
-static int MakeSelectors(const int blue_index,
- const int green_index,
- const int red_index,
- uint32 dst_fourcc_bayer,
- uint32* index_map) {
- // Now build a lookup table containing the indices for the four pixels in each
- // 2x2 Bayer grid.
- switch (dst_fourcc_bayer) {
- case FOURCC_BGGR:
- index_map[0] = GenerateSelector(blue_index, green_index);
- index_map[1] = GenerateSelector(green_index, red_index);
- break;
- case FOURCC_GBRG:
- index_map[0] = GenerateSelector(green_index, blue_index);
- index_map[1] = GenerateSelector(red_index, green_index);
- break;
- case FOURCC_RGGB:
- index_map[0] = GenerateSelector(red_index, green_index);
- index_map[1] = GenerateSelector(green_index, blue_index);
- break;
- case FOURCC_GRBG:
- index_map[0] = GenerateSelector(green_index, red_index);
- index_map[1] = GenerateSelector(blue_index, green_index);
- break;
- default:
- return -1; // Bad FourCC
- }
- return 0;
-}
-
-// Converts 32 bit ARGB to Bayer RGB formats.
-LIBYUV_API
-int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height,
- uint32 dst_fourcc_bayer) {
- int y;
- const int blue_index = 0; // Offsets for ARGB format
- const int green_index = 1;
- const int red_index = 2;
- uint32 index_map[2];
- void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) = ARGBToBayerRow_C;
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_SSSE3;
- }
- }
-#elif defined(HAS_ARGBTOBAYERROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_NEON;
- }
- }
-#endif
- if (MakeSelectors(blue_index, green_index, red_index,
- dst_fourcc_bayer, index_map)) {
- return -1; // Bad FourCC
- }
-
- for (y = 0; y < height; ++y) {
- ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
- src_argb += src_stride_argb;
- dst_bayer += dst_stride_bayer;
- }
- return 0;
-}
-
-#define AVG(a, b) (((a) + (b)) >> 1)
-
-static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 g = src_bayer0[1];
- uint8 r = src_bayer1[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = src_bayer0[0];
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = AVG(r, src_bayer1[1]);
- dst_argb[3] = 255U;
- dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = src_bayer1[1];
- dst_argb[7] = 255U;
- g = src_bayer0[1];
- r = src_bayer1[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = src_bayer0[0];
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = AVG(r, src_bayer1[1]);
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer0[0];
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = src_bayer1[1];
- dst_argb[7] = 255U;
- }
-}
-
-static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 g = src_bayer0[1];
- uint8 b = src_bayer1[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = AVG(b, src_bayer1[1]);
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = src_bayer0[0];
- dst_argb[3] = 255U;
- dst_argb[4] = src_bayer1[1];
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[7] = 255U;
- g = src_bayer0[1];
- b = src_bayer1[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = AVG(b, src_bayer1[1]);
- dst_argb[1] = AVG(g, src_bayer0[1]);
- dst_argb[2] = src_bayer0[0];
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer1[1];
- dst_argb[5] = src_bayer0[1];
- dst_argb[6] = src_bayer0[0];
- dst_argb[7] = 255U;
- }
-}
-
-static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 b = src_bayer0[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = AVG(b, src_bayer0[1]);
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = src_bayer1[0];
- dst_argb[3] = 255U;
- dst_argb[4] = src_bayer0[1];
- dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
- dst_argb[7] = 255U;
- b = src_bayer0[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = AVG(b, src_bayer0[1]);
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = src_bayer1[0];
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer0[1];
- dst_argb[5] = src_bayer0[0];
- dst_argb[6] = src_bayer1[0];
- dst_argb[7] = 255U;
- }
-}
-
-static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
- uint8* dst_argb, int pix) {
- const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
- uint8 r = src_bayer0[1];
- int x;
- for (x = 0; x < pix - 2; x += 2) {
- dst_argb[0] = src_bayer1[0];
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = AVG(r, src_bayer0[1]);
- dst_argb[3] = 255U;
- dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
- dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
- dst_argb[6] = src_bayer0[1];
- dst_argb[7] = 255U;
- r = src_bayer0[1];
- src_bayer0 += 2;
- src_bayer1 += 2;
- dst_argb += 8;
- }
- dst_argb[0] = src_bayer1[0];
- dst_argb[1] = src_bayer0[0];
- dst_argb[2] = AVG(r, src_bayer0[1]);
- dst_argb[3] = 255U;
- if (!(pix & 1)) {
- dst_argb[4] = src_bayer1[0];
- dst_argb[5] = src_bayer0[0];
- dst_argb[6] = src_bayer0[1];
- dst_argb[7] = 255U;
- }
-}
-
-// Converts any Bayer RGB format to ARGB.
-LIBYUV_API
-int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
- uint32 src_fourcc_bayer) {
- int y;
- void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
- void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- switch (src_fourcc_bayer) {
- case FOURCC_BGGR:
- BayerRow0 = BayerRowBG;
- BayerRow1 = BayerRowGR;
- break;
- case FOURCC_GBRG:
- BayerRow0 = BayerRowGB;
- BayerRow1 = BayerRowRG;
- break;
- case FOURCC_GRBG:
- BayerRow0 = BayerRowGR;
- BayerRow1 = BayerRowBG;
- break;
- case FOURCC_RGGB:
- BayerRow0 = BayerRowRG;
- BayerRow1 = BayerRowGB;
- break;
- default:
- return -1; // Bad FourCC
- }
-
- for (y = 0; y < height - 1; y += 2) {
- BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
- BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
- dst_argb + dst_stride_argb, width);
- src_bayer += src_stride_bayer * 2;
- dst_argb += dst_stride_argb * 2;
- }
- if (height & 1) {
- BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
- }
- return 0;
-}
-
-// Converts any Bayer RGB format to ARGB.
-LIBYUV_API
-int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- uint32 src_fourcc_bayer) {
- void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
- void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
- uint8* dst_argb, int pix);
-
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- int halfheight;
- height = -height;
- halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_u = dst_u + (halfheight - 1) * dst_stride_u;
- dst_v = dst_v + (halfheight - 1) * dst_stride_v;
- dst_stride_y = -dst_stride_y;
- dst_stride_u = -dst_stride_u;
- dst_stride_v = -dst_stride_v;
- }
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
- }
-#elif defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- if (width >= 16) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
- }
-#endif
-
- switch (src_fourcc_bayer) {
- case FOURCC_BGGR:
- BayerRow0 = BayerRowBG;
- BayerRow1 = BayerRowGR;
- break;
- case FOURCC_GBRG:
- BayerRow0 = BayerRowGB;
- BayerRow1 = BayerRowRG;
- break;
- case FOURCC_GRBG:
- BayerRow0 = BayerRowGR;
- BayerRow1 = BayerRowBG;
- break;
- case FOURCC_RGGB:
- BayerRow0 = BayerRowRG;
- BayerRow1 = BayerRowGB;
- break;
- default:
- return -1; // Bad FourCC
- }
-
- {
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
- int y;
- for (y = 0; y < height - 1; y += 2) {
- BayerRow0(src_bayer, src_stride_bayer, row, width);
- BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
- row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
- src_bayer += src_stride_bayer * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- BayerRow0(src_bayer, src_stride_bayer, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- }
- free_aligned_buffer_64(row);
- }
- return 0;
-}
-
-// Convert I420 to Bayer.
-LIBYUV_API
-int I420ToBayer(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bayer, int dst_stride_bayer,
- int width, int height,
- uint32 dst_fourcc_bayer) {
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
- void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) = ARGBToBayerRow_C;
- const int blue_index = 0; // Offsets for ARGB format
- const int green_index = 1;
- const int red_index = 2;
- uint32 index_map[2];
- // Negative height means invert the image.
- if (height < 0) {
- int halfheight;
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_SSSE3;
- }
- }
-#elif defined(HAS_ARGBTOBAYERROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_NEON;
- }
- }
-#endif
-
- if (MakeSelectors(blue_index, green_index, red_index,
- dst_fourcc_bayer, index_map)) {
- return -1; // Bad FourCC
- }
- {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- int y;
- for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, row, width);
- ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
- dst_bayer += dst_stride_bayer;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- free_aligned_buffer_64(row);
- }
- return 0;
-}
-
-#define MAKEBAYERFOURCC(BAYER) \
-LIBYUV_API \
-int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \
- uint8* dst_y, int dst_stride_y, \
- uint8* dst_u, int dst_stride_u, \
- uint8* dst_v, int dst_stride_v, \
- int width, int height) { \
- return BayerToI420(src_bayer, src_stride_bayer, \
- dst_y, dst_stride_y, \
- dst_u, dst_stride_u, \
- dst_v, dst_stride_v, \
- width, height, \
- FOURCC_##BAYER); \
-} \
- \
-LIBYUV_API \
-int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \
- const uint8* src_u, int src_stride_u, \
- const uint8* src_v, int src_stride_v, \
- uint8* dst_bayer, int dst_stride_bayer, \
- int width, int height) { \
- return I420ToBayer(src_y, src_stride_y, \
- src_u, src_stride_u, \
- src_v, src_stride_v, \
- dst_bayer, dst_stride_bayer, \
- width, height, \
- FOURCC_##BAYER); \
-} \
- \
-LIBYUV_API \
-int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \
- uint8* dst_bayer, int dst_stride_bayer, \
- int width, int height) { \
- return ARGBToBayer(src_argb, src_stride_argb, \
- dst_bayer, dst_stride_bayer, \
- width, height, \
- FOURCC_##BAYER); \
-} \
- \
-LIBYUV_API \
-int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \
- uint8* dst_argb, int dst_stride_argb, \
- int width, int height) { \
- return BayerToARGB(src_bayer, src_stride_bayer, \
- dst_argb, dst_stride_argb, \
- width, height, \
- FOURCC_##BAYER); \
-}
-
-MAKEBAYERFOURCC(BGGR)
-MAKEBAYERFOURCC(GBRG)
-MAKEBAYERFOURCC(GRBG)
-MAKEBAYERFOURCC(RGGB)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc b/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc
deleted file mode 100755
index 193b829ba9..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/mjpeg_decoder.h"
-
-#ifdef HAVE_JPEG
-#include <assert.h>
-
-#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
- !defined(TARGET_IPHONE_SIMULATOR)
-// Must be included before jpeglib.
-#include <setjmp.h>
-#define HAVE_SETJMP
-#endif
-struct FILE; // For jpeglib.h.
-
-// C++ build requires extern C for jpeg internals.
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <jpeglib.h>
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#include "libyuv/planar_functions.h" // For CopyPlane().
-
-namespace libyuv {
-
-#ifdef HAVE_SETJMP
-struct SetJmpErrorMgr {
- jpeg_error_mgr base; // Must be at the top
- jmp_buf setjmp_buffer;
-};
-#endif
-
-const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
-const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
-const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
-const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
-const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
-const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
-
-MJpegDecoder::MJpegDecoder()
- : has_scanline_padding_(LIBYUV_FALSE),
- num_outbufs_(0),
- scanlines_(NULL),
- scanlines_sizes_(NULL),
- databuf_(NULL),
- databuf_strides_(NULL) {
- decompress_struct_ = new jpeg_decompress_struct;
- source_mgr_ = new jpeg_source_mgr;
-#ifdef HAVE_SETJMP
- error_mgr_ = new SetJmpErrorMgr;
- decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
- // Override standard exit()-based error handler.
- error_mgr_->base.error_exit = &ErrorHandler;
-#endif
- decompress_struct_->client_data = NULL;
- source_mgr_->init_source = &init_source;
- source_mgr_->fill_input_buffer = &fill_input_buffer;
- source_mgr_->skip_input_data = &skip_input_data;
- source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
- source_mgr_->term_source = &term_source;
- jpeg_create_decompress(decompress_struct_);
- decompress_struct_->src = source_mgr_;
- buf_vec_.buffers = &buf_;
- buf_vec_.len = 1;
-}
-
-MJpegDecoder::~MJpegDecoder() {
- jpeg_destroy_decompress(decompress_struct_);
- delete decompress_struct_;
- delete source_mgr_;
-#ifdef HAVE_SETJMP
- delete error_mgr_;
-#endif
- DestroyOutputBuffers();
-}
-
-LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
- if (!ValidateJpeg(src, src_len)) {
- return LIBYUV_FALSE;
- }
-
- buf_.data = src;
- buf_.len = (int)(src_len);
- buf_vec_.pos = 0;
- decompress_struct_->client_data = &buf_vec_;
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called jpeg_read_header, it experienced an error, and we called
- // longjmp() and rewound the stack to here. Return error.
- return LIBYUV_FALSE;
- }
-#endif
- if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
- // ERROR: Bad MJPEG header
- return LIBYUV_FALSE;
- }
- AllocOutputBuffers(GetNumComponents());
- for (int i = 0; i < num_outbufs_; ++i) {
- int scanlines_size = GetComponentScanlinesPerImcuRow(i);
- if (scanlines_sizes_[i] != scanlines_size) {
- if (scanlines_[i]) {
- delete scanlines_[i];
- }
- scanlines_[i] = new uint8* [scanlines_size];
- scanlines_sizes_[i] = scanlines_size;
- }
-
- // We allocate padding for the final scanline to pad it up to DCTSIZE bytes
- // to avoid memory errors, since jpeglib only reads full MCUs blocks. For
- // the preceding scanlines, the padding is not needed/wanted because the
- // following addresses will already be valid (they are the initial bytes of
- // the next scanline) and will be overwritten when jpeglib writes out that
- // next scanline.
- int databuf_stride = GetComponentStride(i);
- int databuf_size = scanlines_size * databuf_stride;
- if (databuf_strides_[i] != databuf_stride) {
- if (databuf_[i]) {
- delete databuf_[i];
- }
- databuf_[i] = new uint8[databuf_size];
- databuf_strides_[i] = databuf_stride;
- }
-
- if (GetComponentStride(i) != GetComponentWidth(i)) {
- has_scanline_padding_ = LIBYUV_TRUE;
- }
- }
- return LIBYUV_TRUE;
-}
-
-static int DivideAndRoundUp(int numerator, int denominator) {
- return (numerator + denominator - 1) / denominator;
-}
-
-static int DivideAndRoundDown(int numerator, int denominator) {
- return numerator / denominator;
-}
-
-// Returns width of the last loaded frame.
-int MJpegDecoder::GetWidth() {
- return decompress_struct_->image_width;
-}
-
-// Returns height of the last loaded frame.
-int MJpegDecoder::GetHeight() {
- return decompress_struct_->image_height;
-}
-
-// Returns format of the last loaded frame. The return value is one of the
-// kColorSpace* constants.
-int MJpegDecoder::GetColorSpace() {
- return decompress_struct_->jpeg_color_space;
-}
-
-// Number of color components in the color space.
-int MJpegDecoder::GetNumComponents() {
- return decompress_struct_->num_components;
-}
-
-// Sample factors of the n-th component.
-int MJpegDecoder::GetHorizSampFactor(int component) {
- return decompress_struct_->comp_info[component].h_samp_factor;
-}
-
-int MJpegDecoder::GetVertSampFactor(int component) {
- return decompress_struct_->comp_info[component].v_samp_factor;
-}
-
-int MJpegDecoder::GetHorizSubSampFactor(int component) {
- return decompress_struct_->max_h_samp_factor /
- GetHorizSampFactor(component);
-}
-
-int MJpegDecoder::GetVertSubSampFactor(int component) {
- return decompress_struct_->max_v_samp_factor /
- GetVertSampFactor(component);
-}
-
-int MJpegDecoder::GetImageScanlinesPerImcuRow() {
- return decompress_struct_->max_v_samp_factor * DCTSIZE;
-}
-
-int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
- int vs = GetVertSubSampFactor(component);
- return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
-}
-
-int MJpegDecoder::GetComponentWidth(int component) {
- int hs = GetHorizSubSampFactor(component);
- return DivideAndRoundUp(GetWidth(), hs);
-}
-
-int MJpegDecoder::GetComponentHeight(int component) {
- int vs = GetVertSubSampFactor(component);
- return DivideAndRoundUp(GetHeight(), vs);
-}
-
-// Get width in bytes padded out to a multiple of DCTSIZE
-int MJpegDecoder::GetComponentStride(int component) {
- return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
-}
-
-int MJpegDecoder::GetComponentSize(int component) {
- return GetComponentWidth(component) * GetComponentHeight(component);
-}
-
-LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called jpeg_abort_decompress, it experienced an error, and we called
- // longjmp() and rewound the stack to here. Return error.
- return LIBYUV_FALSE;
- }
-#endif
- jpeg_abort_decompress(decompress_struct_);
- return LIBYUV_TRUE;
-}
-
-// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
-LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
- uint8** planes, int dst_width, int dst_height) {
- if (dst_width != GetWidth() ||
- dst_height > GetHeight()) {
- // ERROR: Bad dimensions
- return LIBYUV_FALSE;
- }
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called into jpeglib, it experienced an error sometime during this
- // function call, and we called longjmp() and rewound the stack to here.
- // Return error.
- return LIBYUV_FALSE;
- }
-#endif
- if (!StartDecode()) {
- return LIBYUV_FALSE;
- }
- SetScanlinePointers(databuf_);
- int lines_left = dst_height;
- // Compute amount of lines to skip to implement vertical crop.
- // TODO(fbarchard): Ensure skip is a multiple of maximum component
- // subsample. ie 2
- int skip = (GetHeight() - dst_height) / 2;
- if (skip > 0) {
- // There is no API to skip lines in the output data, so we read them
- // into the temp buffer.
- while (skip >= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- skip -= GetImageScanlinesPerImcuRow();
- }
- if (skip > 0) {
- // Have a partial iMCU row left over to skip. Must read it and then
- // copy the parts we want into the destination.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- // TODO(fbarchard): Compute skip to avoid this
- assert(skip % GetVertSubSampFactor(i) == 0);
- int rows_to_skip =
- DivideAndRoundDown(skip, GetVertSubSampFactor(i));
- int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
- rows_to_skip;
- int data_to_skip = rows_to_skip * GetComponentStride(i);
- CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
- planes[i], GetComponentWidth(i),
- GetComponentWidth(i), scanlines_to_copy);
- planes[i] += scanlines_to_copy * GetComponentWidth(i);
- }
- lines_left -= (GetImageScanlinesPerImcuRow() - skip);
- }
- }
-
- // Read full MCUs but cropped horizontally
- for (; lines_left > GetImageScanlinesPerImcuRow();
- lines_left -= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
- CopyPlane(databuf_[i], GetComponentStride(i),
- planes[i], GetComponentWidth(i),
- GetComponentWidth(i), scanlines_to_copy);
- planes[i] += scanlines_to_copy * GetComponentWidth(i);
- }
- }
-
- if (lines_left > 0) {
- // Have a partial iMCU row left over to decode.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- int scanlines_to_copy =
- DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
- CopyPlane(databuf_[i], GetComponentStride(i),
- planes[i], GetComponentWidth(i),
- GetComponentWidth(i), scanlines_to_copy);
- planes[i] += scanlines_to_copy * GetComponentWidth(i);
- }
- }
- return FinishDecode();
-}
-
-LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
- int dst_width, int dst_height) {
- if (dst_width != GetWidth() ||
- dst_height > GetHeight()) {
- // ERROR: Bad dimensions
- return LIBYUV_FALSE;
- }
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called into jpeglib, it experienced an error sometime during this
- // function call, and we called longjmp() and rewound the stack to here.
- // Return error.
- return LIBYUV_FALSE;
- }
-#endif
- if (!StartDecode()) {
- return LIBYUV_FALSE;
- }
- SetScanlinePointers(databuf_);
- int lines_left = dst_height;
- // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
- int skip = (GetHeight() - dst_height) / 2;
- if (skip > 0) {
- while (skip >= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- skip -= GetImageScanlinesPerImcuRow();
- }
- if (skip > 0) {
- // Have a partial iMCU row left over to skip.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- // TODO(fbarchard): Compute skip to avoid this
- assert(skip % GetVertSubSampFactor(i) == 0);
- int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
- int data_to_skip = rows_to_skip * GetComponentStride(i);
- // Change our own data buffer pointers so we can pass them to the
- // callback.
- databuf_[i] += data_to_skip;
- }
- int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
- (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
- // Now change them back.
- for (int i = 0; i < num_outbufs_; ++i) {
- int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
- int data_to_skip = rows_to_skip * GetComponentStride(i);
- databuf_[i] -= data_to_skip;
- }
- lines_left -= scanlines_to_copy;
- }
- }
- // Read full MCUs until we get to the crop point.
- for (; lines_left >= GetImageScanlinesPerImcuRow();
- lines_left -= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
- }
- if (lines_left > 0) {
- // Have a partial iMCU row left over to decode.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- (*fn)(opaque, databuf_, databuf_strides_, lines_left);
- }
- return FinishDecode();
-}
-
-void MJpegDecoder::init_source(j_decompress_ptr cinfo) {
- fill_input_buffer(cinfo);
-}
-
-boolean MJpegDecoder::fill_input_buffer(j_decompress_ptr cinfo) {
- BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
- if (buf_vec->pos >= buf_vec->len) {
- assert(0 && "No more data");
- // ERROR: No more data
- return FALSE;
- }
- cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
- cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
- ++buf_vec->pos;
- return TRUE;
-}
-
-void MJpegDecoder::skip_input_data(j_decompress_ptr cinfo,
- long num_bytes) { // NOLINT
- cinfo->src->next_input_byte += num_bytes;
-}
-
-void MJpegDecoder::term_source(j_decompress_ptr cinfo) {
- // Nothing to do.
-}
-
-#ifdef HAVE_SETJMP
-void MJpegDecoder::ErrorHandler(j_common_ptr cinfo) {
- // This is called when a jpeglib command experiences an error. Unfortunately
- // jpeglib's error handling model is not very flexible, because it expects the
- // error handler to not return--i.e., it wants the program to terminate. To
- // recover from errors we use setjmp() as shown in their example. setjmp() is
- // C's implementation for the "call with current continuation" functionality
- // seen in some functional programming languages.
- // A formatted message can be output, but is unsafe for release.
-#ifdef DEBUG
- char buf[JMSG_LENGTH_MAX];
- (*cinfo->err->format_message)(cinfo, buf);
- // ERROR: Error in jpeglib: buf
-#endif
-
- SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
- // This rewinds the call stack to the point of the corresponding setjmp()
- // and causes it to return (for a second time) with value 1.
- longjmp(mgr->setjmp_buffer, 1);
-}
-#endif
-
-void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
- if (num_outbufs != num_outbufs_) {
- // We could perhaps optimize this case to resize the output buffers without
- // necessarily having to delete and recreate each one, but it's not worth
- // it.
- DestroyOutputBuffers();
-
- scanlines_ = new uint8** [num_outbufs];
- scanlines_sizes_ = new int[num_outbufs];
- databuf_ = new uint8* [num_outbufs];
- databuf_strides_ = new int[num_outbufs];
-
- for (int i = 0; i < num_outbufs; ++i) {
- scanlines_[i] = NULL;
- scanlines_sizes_[i] = 0;
- databuf_[i] = NULL;
- databuf_strides_[i] = 0;
- }
-
- num_outbufs_ = num_outbufs;
- }
-}
-
-void MJpegDecoder::DestroyOutputBuffers() {
- for (int i = 0; i < num_outbufs_; ++i) {
- delete [] scanlines_[i];
- delete [] databuf_[i];
- }
- delete [] scanlines_;
- delete [] databuf_;
- delete [] scanlines_sizes_;
- delete [] databuf_strides_;
- scanlines_ = NULL;
- databuf_ = NULL;
- scanlines_sizes_ = NULL;
- databuf_strides_ = NULL;
- num_outbufs_ = 0;
-}
-
-// JDCT_IFAST and do_block_smoothing improve performance substantially.
-LIBYUV_BOOL MJpegDecoder::StartDecode() {
- decompress_struct_->raw_data_out = TRUE;
- decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
- decompress_struct_->dither_mode = JDITHER_NONE;
- // Not applicable to 'raw':
- decompress_struct_->do_fancy_upsampling = LIBYUV_FALSE;
- // Only for buffered mode:
- decompress_struct_->enable_2pass_quant = LIBYUV_FALSE;
- // Blocky but fast:
- decompress_struct_->do_block_smoothing = LIBYUV_FALSE;
-
- if (!jpeg_start_decompress(decompress_struct_)) {
- // ERROR: Couldn't start JPEG decompressor";
- return LIBYUV_FALSE;
- }
- return LIBYUV_TRUE;
-}
-
-LIBYUV_BOOL MJpegDecoder::FinishDecode() {
- // jpeglib considers it an error if we finish without decoding the whole
- // image, so we call "abort" rather than "finish".
- jpeg_abort_decompress(decompress_struct_);
- return LIBYUV_TRUE;
-}
-
-void MJpegDecoder::SetScanlinePointers(uint8** data) {
- for (int i = 0; i < num_outbufs_; ++i) {
- uint8* data_i = data[i];
- for (int j = 0; j < scanlines_sizes_[i]; ++j) {
- scanlines_[i][j] = data_i;
- data_i += GetComponentStride(i);
- }
- }
-}
-
-inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
- return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
- jpeg_read_raw_data(decompress_struct_,
- scanlines_,
- GetImageScanlinesPerImcuRow());
-}
-
-// The helper function which recognizes the jpeg sub-sampling type.
-JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
- int* subsample_x, int* subsample_y, int number_of_components) {
- if (number_of_components == 3) { // Color images.
- if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 2 && subsample_y[1] == 2 &&
- subsample_x[2] == 2 && subsample_y[2] == 2) {
- return kJpegYuv420;
- } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 2 && subsample_y[1] == 1 &&
- subsample_x[2] == 2 && subsample_y[2] == 1) {
- return kJpegYuv422;
- } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 1 && subsample_y[1] == 1 &&
- subsample_x[2] == 1 && subsample_y[2] == 1) {
- return kJpegYuv444;
- }
- } else if (number_of_components == 1) { // Grey-scale images.
- if (subsample_x[0] == 1 && subsample_y[0] == 1) {
- return kJpegYuv400;
- }
- }
- return kJpegUnknown;
-}
-
-} // namespace libyuv
-#endif // HAVE_JPEG
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc b/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc
deleted file mode 100755
index 23d22d099b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/mjpeg_decoder.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Helper function to validate the jpeg appears intact.
-// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
- size_t i;
- if (sample_size < 64) {
- // ERROR: Invalid jpeg size: sample_size
- return LIBYUV_FALSE;
- }
- if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
- // ERROR: Invalid jpeg initial start code
- return LIBYUV_FALSE;
- }
- for (i = sample_size - 2; i > 1;) {
- if (sample[i] != 0xd9) {
- if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image
- return LIBYUV_TRUE; // Success: Valid jpeg.
- }
- --i;
- }
- --i;
- }
- // ERROR: Invalid jpeg end code not found. Size sample_size
- return LIBYUV_FALSE;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc b/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc
deleted file mode 100755
index f0a8989051..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc
+++ /dev/null
@@ -1,2238 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/planar_functions.h"
-
-#include <string.h> // for memset()
-
-#include "libyuv/cpu_id.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy a plane of data
-LIBYUV_API
-void CopyPlane(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- int y;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_y = 0;
- }
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- CopyRow = CopyRow_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Copy plane
- for (y = 0; y < height; ++y) {
- CopyRow(src_y, dst_y, width);
- src_y += src_stride_y;
- dst_y += dst_stride_y;
- }
-}
-
-// Copy I422.
-LIBYUV_API
-int I422Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int halfwidth = (width + 1) >> 1;
- if (!src_y || !src_u || !src_v ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (height - 1) * src_stride_u;
- src_v = src_v + (height - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
- CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
- return 0;
-}
-
-// Copy I444.
-LIBYUV_API
-int I444Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_y || !src_u || !src_v ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (height - 1) * src_stride_u;
- src_v = src_v + (height - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
- CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
- return 0;
-}
-
-// Copy I400.
-LIBYUV_API
-int I400ToI400(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-// Convert I420 to I400.
-LIBYUV_API
-int I420ToI400(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-// Mirror a plane of data.
-void MirrorPlane(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- int y;
- void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
-#if defined(HAS_MIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_NEON;
- }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_SSE2;
- }
-#endif
-#if defined(HAS_MIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- MirrorRow = MirrorRow_SSSE3;
- }
-#endif
-#if defined(HAS_MIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
- MirrorRow = MirrorRow_AVX2;
- }
-#endif
-
- // Mirror plane
- for (y = 0; y < height; ++y) {
- MirrorRow(src_y, dst_y, width);
- src_y += src_stride_y;
- dst_y += dst_stride_y;
- }
-}
-
-// Convert YUY2 to I422.
-LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*YUY2ToUV422Row)(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) =
- YUY2ToUV422Row_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
- YUY2ToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
- src_stride_yuy2 = -src_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_yuy2 == width * 2 &&
- dst_stride_y == width &&
- dst_stride_u * 2 == width &&
- dst_stride_v * 2 == width) {
- width *= height;
- height = 1;
- src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
- YUY2ToYRow = YUY2ToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
- YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
- YUY2ToYRow = YUY2ToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
- YUY2ToYRow = YUY2ToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- YUY2ToYRow = YUY2ToYRow_Any_NEON;
- if (width >= 16) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- YUY2ToYRow = YUY2ToYRow_NEON;
- YUY2ToUV422Row = YUY2ToUV422Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- src_yuy2 += src_stride_yuy2;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// Convert UYVY to I422.
-LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*UYVYToUV422Row)(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) =
- UYVYToUV422Row_C;
- void (*UYVYToYRow)(const uint8* src_uyvy,
- uint8* dst_y, int pix) = UYVYToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
- src_stride_uyvy = -src_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_uyvy == width * 2 &&
- dst_stride_y == width &&
- dst_stride_u * 2 == width &&
- dst_stride_v * 2 == width) {
- width *= height;
- height = 1;
- src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_UYVYTOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
- UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
- UYVYToYRow = UYVYToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
- UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
- UYVYToUV422Row = UYVYToUV422Row_SSE2;
- if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- UYVYToYRow = UYVYToYRow_SSE2;
- }
- }
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
- UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
- UYVYToYRow = UYVYToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- UYVYToUV422Row = UYVYToUV422Row_AVX2;
- UYVYToYRow = UYVYToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- UYVYToYRow = UYVYToYRow_Any_NEON;
- if (width >= 16) {
- UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- UYVYToYRow = UYVYToYRow_NEON;
- UYVYToUV422Row = UYVYToUV422Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
- UYVYToYRow(src_uyvy, dst_y, width);
- src_uyvy += src_stride_uyvy;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// Mirror I400 with optional flipping
-LIBYUV_API
-int I400Mirror(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
-
- MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-// Mirror I420 with optional flipping
-LIBYUV_API
-int I420Mirror(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- if (dst_y) {
- MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- }
- MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
- MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
- return 0;
-}
-
-// ARGB mirror.
-LIBYUV_API
-int ARGBMirror(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
- ARGBMirrorRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-
-#if defined(HAS_ARGBMIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBMirrorRow = ARGBMirrorRow_SSSE3;
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
- ARGBMirrorRow = ARGBMirrorRow_AVX2;
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_NEON;
- }
-#endif
-
- // Mirror plane
- for (y = 0; y < height; ++y) {
- ARGBMirrorRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Get a blender that optimized for the CPU, alignment and pixel count.
-// As there are 6 blenders to choose from, the caller should try to use
-// the same blend function for all pixels if possible.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend() {
- void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width) = ARGBBlendRow_C;
-#if defined(HAS_ARGBBLENDROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBBlendRow = ARGBBlendRow_SSSE3;
- return ARGBBlendRow;
- }
-#endif
-#if defined(HAS_ARGBBLENDROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBBlendRow = ARGBBlendRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBBLENDROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBBlendRow = ARGBBlendRow_NEON;
- }
-#endif
- return ARGBBlendRow;
-}
-
-// Alpha Blend 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width) = GetARGBBlend();
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-
- for (y = 0; y < height; ++y) {
- ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Multiply 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBMultiplyRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBMULTIPLYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
- ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBMULTIPLYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
- ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBMULTIPLYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBMultiplyRow = ARGBMultiplyRow_NEON;
- }
- }
-#endif
-
- // Multiply plane
- for (y = 0; y < height; ++y) {
- ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Add 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBAddRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBAddRow = ARGBAddRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
- ARGBAddRow = ARGBAddRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBAddRow = ARGBAddRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBADDROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
- ARGBAddRow = ARGBAddRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBAddRow = ARGBAddRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBADDROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBAddRow = ARGBAddRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBAddRow = ARGBAddRow_NEON;
- }
- }
-#endif
-
- // Add plane
- for (y = 0; y < height; ++y) {
- ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Subtract 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBSubtractRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSUBTRACTROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
- ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBSubtractRow = ARGBSubtractRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBSUBTRACTROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
- ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBSubtractRow = ARGBSubtractRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBSUBTRACTROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBSubtractRow = ARGBSubtractRow_NEON;
- }
- }
-#endif
-
- // Subtract plane
- for (y = 0; y < height; ++y) {
- ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert I422 to BGRA.
-LIBYUV_API
-int I422ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height) {
- int y;
- void (*I422ToBGRARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToBGRARow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_bgra ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
- dst_stride_bgra = -dst_stride_bgra;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_bgra == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
- }
-#if defined(HAS_I422TOBGRAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToBGRARow = I422ToBGRARow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToBGRARow = I422ToBGRARow_NEON;
- }
- }
-#elif defined(HAS_I422TOBGRAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
- I422ToBGRARow = I422ToBGRARow_SSSE3;
- }
- }
- }
-#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
- I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
- dst_bgra += dst_stride_bgra;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I422 to ABGR.
-LIBYUV_API
-int I422ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height) {
- int y;
- void (*I422ToABGRRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToABGRRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_abgr ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
- dst_stride_abgr = -dst_stride_abgr;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_abgr == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
- }
-#if defined(HAS_I422TOABGRROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToABGRRow = I422ToABGRRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToABGRRow = I422ToABGRRow_NEON;
- }
- }
-#elif defined(HAS_I422TOABGRROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
- I422ToABGRRow = I422ToABGRRow_SSSE3;
- }
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
- dst_abgr += dst_stride_abgr;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I422 to RGBA.
-LIBYUV_API
-int I422ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height) {
- int y;
- void (*I422ToRGBARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGBARow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_rgba ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
- dst_stride_rgba = -dst_stride_rgba;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_rgba == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
- }
-#if defined(HAS_I422TORGBAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToRGBARow = I422ToRGBARow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToRGBARow = I422ToRGBARow_NEON;
- }
- }
-#elif defined(HAS_I422TORGBAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
- I422ToRGBARow = I422ToRGBARow_SSSE3;
- }
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
- dst_rgba += dst_stride_rgba;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert NV12 to RGB565.
-LIBYUV_API
-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- int y;
- void (*NV12ToRGB565Row)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV12ToRGB565Row_C;
- if (!src_y || !src_uv || !dst_rgb565 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
- }
-#if defined(HAS_NV12TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
- }
- }
-#elif defined(HAS_NV12TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV12ToRGB565Row = NV12ToRGB565Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_uv += src_stride_uv;
- }
- }
- return 0;
-}
-
-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- int y;
- void (*NV21ToRGB565Row)(const uint8* y_buf,
- const uint8* src_vu,
- uint8* rgb_buf,
- int width) = NV21ToRGB565Row_C;
- if (!src_y || !src_vu || !dst_rgb565 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
- }
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
- }
- }
-#elif defined(HAS_NV21TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_vu += src_stride_vu;
- }
- }
- return 0;
-}
-
-LIBYUV_API
-void SetPlane(uint8* dst_y, int dst_stride_y,
- int width, int height,
- uint32 value) {
- int y;
- uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
- void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
- // Coalesce rows.
- if (dst_stride_y == width) {
- width *= height;
- height = 1;
- dst_stride_y = 0;
- }
-#if defined(HAS_SETROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) &&
- IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- SetRow = SetRow_NEON;
- }
-#endif
-#if defined(HAS_SETROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- SetRow = SetRow_X86;
- }
-#endif
-
- // Set plane
- for (y = 0; y < height; ++y) {
- SetRow(dst_y, v32, width);
- dst_y += dst_stride_y;
- }
-}
-
-// Draw a rectangle into I420
-LIBYUV_API
-int I420Rect(uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int x, int y,
- int width, int height,
- int value_y, int value_u, int value_v) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- uint8* start_y = dst_y + y * dst_stride_y + x;
- uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
- uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
- if (!dst_y || !dst_u || !dst_v ||
- width <= 0 || height <= 0 ||
- x < 0 || y < 0 ||
- value_y < 0 || value_y > 255 ||
- value_u < 0 || value_u > 255 ||
- value_v < 0 || value_v > 255) {
- return -1;
- }
-
- SetPlane(start_y, dst_stride_y, width, height, value_y);
- SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
- SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
- return 0;
-}
-
-// Draw a rectangle into ARGB
-LIBYUV_API
-int ARGBRect(uint8* dst_argb, int dst_stride_argb,
- int dst_x, int dst_y,
- int width, int height,
- uint32 value) {
- if (!dst_argb ||
- width <= 0 || height <= 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
- dst_argb += dst_y * dst_stride_argb + dst_x * 4;
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_SETROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
- return 0;
- }
-#endif
-#if defined(HAS_SETROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
- return 0;
- }
-#endif
- ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
- return 0;
-}
-
-// Convert unattentuated ARGB to preattenuated ARGB.
-// An unattenutated ARGB alpha blend uses the formula
-// p = a * f + (1 - a) * b
-// where
-// p is output pixel
-// f is foreground pixel
-// b is background pixel
-// a is alpha value from foreground pixel
-// An preattenutated ARGB alpha blend uses the formula
-// p = f + (1 - a) * b
-// where
-// f is foreground pixel premultiplied by alpha
-
-LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBAttenuateRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBATTENUATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBAttenuateRow = ARGBAttenuateRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBAttenuateRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert preattentuated ARGB to unattenuated ARGB.
-LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBUnattenuateRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
- }
- }
-#endif
-// TODO(fbarchard): Neon version.
-
- for (y = 0; y < height; ++y) {
- ARGBUnattenuateRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert ARGB to Grayed ARGB.
-LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBGrayRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBGRAYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBGrayRow = ARGBGrayRow_SSSE3;
- }
-#elif defined(HAS_ARGBGRAYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBGrayRow = ARGBGrayRow_NEON;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBGrayRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Make a rectangle of ARGB gray scale.
-LIBYUV_API
-int ARGBGray(uint8* dst_argb, int dst_stride_argb,
- int dst_x, int dst_y,
- int width, int height) {
- int y;
- void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBGrayRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBGRAYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBGrayRow = ARGBGrayRow_SSSE3;
- }
-#elif defined(HAS_ARGBGRAYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBGrayRow = ARGBGrayRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBGrayRow(dst, dst, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Make a rectangle of ARGB Sepia tone.
-LIBYUV_API
-int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSEPIAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBSepiaRow = ARGBSepiaRow_SSSE3;
- }
-#elif defined(HAS_ARGBSEPIAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBSepiaRow = ARGBSepiaRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBSepiaRow(dst, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a 4x4 matrix to each ARGB pixel.
-// Note: Normally for shading, but can be used to swizzle or invert.
-LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_argb,
- int width, int height) {
- int y;
- void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
- if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
- }
-#elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a 4x3 matrix to each ARGB pixel.
-// Deprecated.
-LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_rgb,
- int dst_x, int dst_y, int width, int height) {
- SIMD_ALIGNED(int8 matrix_argb[16]);
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
-
- // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
- matrix_argb[0] = matrix_rgb[0] / 2;
- matrix_argb[1] = matrix_rgb[1] / 2;
- matrix_argb[2] = matrix_rgb[2] / 2;
- matrix_argb[3] = matrix_rgb[3] / 2;
- matrix_argb[4] = matrix_rgb[4] / 2;
- matrix_argb[5] = matrix_rgb[5] / 2;
- matrix_argb[6] = matrix_rgb[6] / 2;
- matrix_argb[7] = matrix_rgb[7] / 2;
- matrix_argb[8] = matrix_rgb[8] / 2;
- matrix_argb[9] = matrix_rgb[9] / 2;
- matrix_argb[10] = matrix_rgb[10] / 2;
- matrix_argb[11] = matrix_rgb[11] / 2;
- matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
- matrix_argb[15] = 64; // 1.0
-
- return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
- dst, dst_stride_argb,
- &matrix_argb[0], width, height);
-}
-
-// Apply a color table each ARGB pixel.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
- int width) = ARGBColorTableRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOLORTABLEROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- ARGBColorTableRow = ARGBColorTableRow_X86;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBColorTableRow(dst, table_argb, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a color table each ARGB pixel but preserve destination alpha.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
- int width) = RGBColorTableRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_RGBCOLORTABLEROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- RGBColorTableRow = RGBColorTableRow_X86;
- }
-#endif
- for (y = 0; y < height; ++y) {
- RGBColorTableRow(dst, table_argb, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// ARGBQuantize is used to posterize art.
-// e.g. rgb / qvalue * qvalue + qvalue / 2
-// But the low levels implement efficiently with 3 parameters, and could be
-// used for other high level operations.
-// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
-// where scale is 1 / interval_size as a fixed point value.
-// The divide is replaces with a multiply by reciprocal fixed point multiply.
-// Caveat - although SSE2 saturates, the C function does not and should be used
-// with care if doing anything but quantization.
-LIBYUV_API
-int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
- int scale, int interval_size, int interval_offset,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) = ARGBQuantizeRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
- interval_size < 1 || interval_size > 255) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBQUANTIZEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
- }
-#elif defined(HAS_ARGBQUANTIZEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBQuantizeRow = ARGBQuantizeRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Computes table of cumulative sum for image where the value is the sum
-// of all values above and to the left of the entry. Used by ARGBBlur.
-LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height) {
- int y;
- void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
- int32* previous_cumsum = dst_cumsum;
- if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
- return -1;
- }
-#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
- }
-#endif
- memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
- for (y = 0; y < height; ++y) {
- ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
- previous_cumsum = dst_cumsum;
- dst_cumsum += dst_stride32_cumsum;
- src_argb += src_stride_argb;
- }
- return 0;
-}
-
-// Blur ARGB image.
-// Caller should allocate CumulativeSum table of width * height * 16 bytes
-// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
-// as the buffer is treated as circular.
-LIBYUV_API
-int ARGBBlur(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height, int radius) {
- int y;
- void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
- const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
- void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
- int32* cumsum_bot_row;
- int32* max_cumsum_bot_row;
- int32* cumsum_top_row;
-
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- if (radius > height) {
- radius = height;
- }
- if (radius > (width / 2 - 1)) {
- radius = width / 2 - 1;
- }
- if (radius <= 0) {
- return -1;
- }
-#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
- CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
- }
-#endif
- // Compute enough CumulativeSum for first row to be blurred. After this
- // one row of CumulativeSum is updated at a time.
- ARGBComputeCumulativeSum(src_argb, src_stride_argb,
- dst_cumsum, dst_stride32_cumsum,
- width, radius);
-
- src_argb = src_argb + radius * src_stride_argb;
- cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
-
- max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
- cumsum_top_row = &dst_cumsum[0];
-
- for (y = 0; y < height; ++y) {
- int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
- int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
- int area = radius * (bot_y - top_y);
- int boxwidth = radius * 4;
- int x;
- int n;
-
- // Increment cumsum_top_row pointer with circular buffer wrap around.
- if (top_y) {
- cumsum_top_row += dst_stride32_cumsum;
- if (cumsum_top_row >= max_cumsum_bot_row) {
- cumsum_top_row = dst_cumsum;
- }
- }
- // Increment cumsum_bot_row pointer with circular buffer wrap around and
- // then fill in a row of CumulativeSum.
- if ((y + radius) < height) {
- const int32* prev_cumsum_bot_row = cumsum_bot_row;
- cumsum_bot_row += dst_stride32_cumsum;
- if (cumsum_bot_row >= max_cumsum_bot_row) {
- cumsum_bot_row = dst_cumsum;
- }
- ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
- width);
- src_argb += src_stride_argb;
- }
-
- // Left clipped.
- for (x = 0; x < radius + 1; ++x) {
- CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
- boxwidth, area, &dst_argb[x * 4], 1);
- area += (bot_y - top_y);
- boxwidth += 4;
- }
-
- // Middle unclipped.
- n = (width - 1) - radius - x + 1;
- CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
- boxwidth, area, &dst_argb[x * 4], n);
-
- // Right clipped.
- for (x += n; x <= width - 1; ++x) {
- area -= (bot_y - top_y);
- boxwidth -= 4;
- CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
- cumsum_bot_row + (x - radius - 1) * 4,
- boxwidth, area, &dst_argb[x * 4], 1);
- }
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Multiply ARGB image by a specified ARGB value.
-LIBYUV_API
-int ARGBShade(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, uint32 value) {
- int y;
- void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
- int width, uint32 value) = ARGBShadeRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSHADEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBShadeRow = ARGBShadeRow_SSE2;
- }
-#elif defined(HAS_ARGBSHADEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBShadeRow = ARGBShadeRow_NEON;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBShadeRow(src_argb, dst_argb, width, value);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Interpolate 2 ARGB images by specified amount (0 to 255).
-LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, int interpolation) {
- int y;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
- IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
- IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
- IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
- IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
- width * 4, interpolation);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Shuffle ARGB channel order. e.g. BGRA to ARGB.
-LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* shuffler, int width, int height) {
- int y;
- void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
- const uint8* shuffler, int pix) = ARGBShuffleRow_C;
- if (!src_bgra || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_bgra = src_bgra + (height - 1) * src_stride_bgra;
- src_stride_bgra = -src_stride_bgra;
- }
- // Coalesce rows.
- if (src_stride_bgra == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_bgra = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSHUFFLEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
- ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBShuffleRow = ARGBShuffleRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
- ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- ARGBShuffleRow = ARGBShuffleRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
- ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- ARGBShuffleRow = ARGBShuffleRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
- ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- ARGBShuffleRow = ARGBShuffleRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
- src_bgra += src_stride_bgra;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Sobel ARGB effect.
-static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
- void (*SobelRow)(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst, int width)) {
- int y;
- void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) = ARGBToBayerGGRow_C;
- void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) = SobelYRow_C;
- void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobely, int width) =
- SobelXRow_C;
- const int kEdge = 16; // Extra pixels at start of row for extrude/align.
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // ARGBToBayer used to select G channel from ARGB.
-#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
- ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToBayerRow = ARGBToBayerGGRow_NEON;
- }
- }
-#endif
-#if defined(HAS_SOBELYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SobelYRow = SobelYRow_SSE2;
- }
-#endif
-#if defined(HAS_SOBELYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SobelYRow = SobelYRow_NEON;
- }
-#endif
-#if defined(HAS_SOBELXROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SobelXRow = SobelXRow_SSE2;
- }
-#endif
-#if defined(HAS_SOBELXROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SobelXRow = SobelXRow_NEON;
- }
-#endif
- {
- // 3 rows with edges before/after.
- const int kRowSize = (width + kEdge + 15) & ~15;
- align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
- uint8* row_sobelx = rows;
- uint8* row_sobely = rows + kRowSize;
- uint8* row_y = rows + kRowSize * 2;
-
- // Convert first row.
- uint8* row_y0 = row_y + kEdge;
- uint8* row_y1 = row_y0 + kRowSize;
- uint8* row_y2 = row_y1 + kRowSize;
- ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
- row_y0[-1] = row_y0[0];
- memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
- ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
- row_y1[-1] = row_y1[0];
- memset(row_y1 + width, row_y1[width - 1], 16);
- memset(row_y2 + width, 0, 16);
-
- for (y = 0; y < height; ++y) {
- // Convert next row of ARGB to Y.
- if (y < (height - 1)) {
- src_argb += src_stride_argb;
- }
- ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
- row_y2[-1] = row_y2[0];
- row_y2[width] = row_y2[width - 1];
-
- SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
- SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
- SobelRow(row_sobelx, row_sobely, dst_argb, width);
-
- // Cycle thru circular queue of 3 row_y buffers.
- {
- uint8* row_yt = row_y0;
- row_y0 = row_y1;
- row_y1 = row_y2;
- row_y2 = row_yt;
- }
-
- dst_argb += dst_stride_argb;
- }
- free_aligned_buffer_64(rows);
- }
- return 0;
-}
-
-// Sobel ARGB effect.
-LIBYUV_API
-int ARGBSobel(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) = SobelRow_C;
-#if defined(HAS_SOBELROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- SobelRow = SobelRow_SSE2;
- }
-#endif
-#if defined(HAS_SOBELROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- SobelRow = SobelRow_NEON;
- }
-#endif
- return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
- width, height, SobelRow);
-}
-
-// Sobel ARGB effect with planar output.
-LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_, int width) = SobelToPlaneRow_C;
-#if defined(HAS_SOBELTOPLANEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
- SobelToPlaneRow = SobelToPlaneRow_SSE2;
- }
-#endif
-#if defined(HAS_SOBELTOPLANEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
- SobelToPlaneRow = SobelToPlaneRow_NEON;
- }
-#endif
- return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
- width, height, SobelToPlaneRow);
-}
-
-// SobelXY ARGB effect.
-// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
-LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) = SobelXYRow_C;
-#if defined(HAS_SOBELXYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- SobelXYRow = SobelXYRow_SSE2;
- }
-#endif
-#if defined(HAS_SOBELXYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- SobelXYRow = SobelXYRow_NEON;
- }
-#endif
- return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
- width, height, SobelXYRow);
-}
-
-// Apply a 4x4 polynomial to each ARGB pixel.
-LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const float* poly,
- int width, int height) {
- int y;
- void (*ARGBPolynomialRow)(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) = ARGBPolynomialRow_C;
- if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
- ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
- IS_ALIGNED(width, 2)) {
- ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBPolynomialRow(src_argb, dst_argb, poly, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a lumacolortable to each ARGB pixel.
-LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* luma,
- int width, int height) {
- int y;
- void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
- int width, const uint8* luma, const uint32 lumacoeff) =
- ARGBLumaColorTableRow_C;
- if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
- ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Copy Alpha from one ARGB image to another.
-LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
- ARGBCopyAlphaRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
- IS_ALIGNED(width, 8)) {
- ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
- ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBCopyAlphaRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Copy a planar Y channel to the alpha channel of a destination ARGB image.
-LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
- ARGBCopyYToAlphaRow_C;
- if (!src_y || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
- IS_ALIGNED(width, 8)) {
- ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
- ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBCopyYToAlphaRow(src_y, dst_argb, width);
- src_y += src_stride_y;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc
deleted file mode 100755
index b052ac1dc4..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc
+++ /dev/null
@@ -1,1301 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/convert.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#if defined(__APPLE__) && defined(__i386__)
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".private_extern _" #name " \n" \
- ".align 4,0x90 \n" \
-"_" #name ": \n"
-#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".align 4,0x90 \n" \
-"_" #name ": \n"
-#else
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".align 4,0x90 \n" \
-#name ": \n"
-#endif
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_MIRRORROW_NEON
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-#define HAS_MIRRORROW_UV_NEON
-void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
-#define HAS_TRANSPOSE_WX8_NEON
-void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWX8_NEON
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width);
-#endif // defined(__ARM_NEON__)
-
-#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
- defined(__mips__) && \
- defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_TRANSPOSE_WX8_MIPS_DSPR2
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-
-void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width);
-#endif // defined(__mips__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
- defined(_M_IX86) && defined(_MSC_VER)
-#define HAS_TRANSPOSE_WX8_SSSE3
-__declspec(naked) __declspec(align(16))
-static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- __asm {
- push edi
- push esi
- push ebp
- mov eax, [esp + 12 + 4] // src
- mov edi, [esp + 12 + 8] // src_stride
- mov edx, [esp + 12 + 12] // dst
- mov esi, [esp + 12 + 16] // dst_stride
- mov ecx, [esp + 12 + 20] // width
-
- // Read in the data from the source pointer.
- // First round of bit swap.
- align 4
- convertloop:
- movq xmm0, qword ptr [eax]
- lea ebp, [eax + 8]
- movq xmm1, qword ptr [eax + edi]
- lea eax, [eax + 2 * edi]
- punpcklbw xmm0, xmm1
- movq xmm2, qword ptr [eax]
- movdqa xmm1, xmm0
- palignr xmm1, xmm1, 8
- movq xmm3, qword ptr [eax + edi]
- lea eax, [eax + 2 * edi]
- punpcklbw xmm2, xmm3
- movdqa xmm3, xmm2
- movq xmm4, qword ptr [eax]
- palignr xmm3, xmm3, 8
- movq xmm5, qword ptr [eax + edi]
- punpcklbw xmm4, xmm5
- lea eax, [eax + 2 * edi]
- movdqa xmm5, xmm4
- movq xmm6, qword ptr [eax]
- palignr xmm5, xmm5, 8
- movq xmm7, qword ptr [eax + edi]
- punpcklbw xmm6, xmm7
- mov eax, ebp
- movdqa xmm7, xmm6
- palignr xmm7, xmm7, 8
- // Second round of bit swap.
- punpcklwd xmm0, xmm2
- punpcklwd xmm1, xmm3
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- palignr xmm2, xmm2, 8
- palignr xmm3, xmm3, 8
- punpcklwd xmm4, xmm6
- punpcklwd xmm5, xmm7
- movdqa xmm6, xmm4
- movdqa xmm7, xmm5
- palignr xmm6, xmm6, 8
- palignr xmm7, xmm7, 8
- // Third round of bit swap.
- // Write to the destination pointer.
- punpckldq xmm0, xmm4
- movq qword ptr [edx], xmm0
- movdqa xmm4, xmm0
- palignr xmm4, xmm4, 8
- movq qword ptr [edx + esi], xmm4
- lea edx, [edx + 2 * esi]
- punpckldq xmm2, xmm6
- movdqa xmm6, xmm2
- palignr xmm6, xmm6, 8
- movq qword ptr [edx], xmm2
- punpckldq xmm1, xmm5
- movq qword ptr [edx + esi], xmm6
- lea edx, [edx + 2 * esi]
- movdqa xmm5, xmm1
- movq qword ptr [edx], xmm1
- palignr xmm5, xmm5, 8
- punpckldq xmm3, xmm7
- movq qword ptr [edx + esi], xmm5
- lea edx, [edx + 2 * esi]
- movq qword ptr [edx], xmm3
- movdqa xmm7, xmm3
- palignr xmm7, xmm7, 8
- sub ecx, 8
- movq qword ptr [edx + esi], xmm7
- lea edx, [edx + 2 * esi]
- jg convertloop
-
- pop ebp
- pop esi
- pop edi
- ret
- }
-}
-
-#define HAS_TRANSPOSE_UVWX8_SSE2
-__declspec(naked) __declspec(align(16))
-static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int w) {
- __asm {
- push ebx
- push esi
- push edi
- push ebp
- mov eax, [esp + 16 + 4] // src
- mov edi, [esp + 16 + 8] // src_stride
- mov edx, [esp + 16 + 12] // dst_a
- mov esi, [esp + 16 + 16] // dst_stride_a
- mov ebx, [esp + 16 + 20] // dst_b
- mov ebp, [esp + 16 + 24] // dst_stride_b
- mov ecx, esp
- sub esp, 4 + 16
- and esp, ~15
- mov [esp + 16], ecx
- mov ecx, [ecx + 16 + 28] // w
-
- align 4
- convertloop:
- // Read in the data from the source pointer.
- // First round of bit swap.
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm0 // use xmm7 as temp register.
- punpcklbw xmm0, xmm1
- punpckhbw xmm7, xmm1
- movdqa xmm1, xmm7
- movdqa xmm2, [eax]
- movdqa xmm3, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm2
- punpcklbw xmm2, xmm3
- punpckhbw xmm7, xmm3
- movdqa xmm3, xmm7
- movdqa xmm4, [eax]
- movdqa xmm5, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm4
- punpcklbw xmm4, xmm5
- punpckhbw xmm7, xmm5
- movdqa xmm5, xmm7
- movdqa xmm6, [eax]
- movdqa xmm7, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa [esp], xmm5 // backup xmm5
- neg edi
- movdqa xmm5, xmm6 // use xmm5 as temp register.
- punpcklbw xmm6, xmm7
- punpckhbw xmm5, xmm7
- movdqa xmm7, xmm5
- lea eax, [eax + 8 * edi + 16]
- neg edi
- // Second round of bit swap.
- movdqa xmm5, xmm0
- punpcklwd xmm0, xmm2
- punpckhwd xmm5, xmm2
- movdqa xmm2, xmm5
- movdqa xmm5, xmm1
- punpcklwd xmm1, xmm3
- punpckhwd xmm5, xmm3
- movdqa xmm3, xmm5
- movdqa xmm5, xmm4
- punpcklwd xmm4, xmm6
- punpckhwd xmm5, xmm6
- movdqa xmm6, xmm5
- movdqa xmm5, [esp] // restore xmm5
- movdqa [esp], xmm6 // backup xmm6
- movdqa xmm6, xmm5 // use xmm6 as temp register.
- punpcklwd xmm5, xmm7
- punpckhwd xmm6, xmm7
- movdqa xmm7, xmm6
- // Third round of bit swap.
- // Write to the destination pointer.
- movdqa xmm6, xmm0
- punpckldq xmm0, xmm4
- punpckhdq xmm6, xmm4
- movdqa xmm4, xmm6
- movdqa xmm6, [esp] // restore xmm6
- movlpd qword ptr [edx], xmm0
- movhpd qword ptr [ebx], xmm0
- movlpd qword ptr [edx + esi], xmm4
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm4
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm2 // use xmm0 as the temp register.
- punpckldq xmm2, xmm6
- movlpd qword ptr [edx], xmm2
- movhpd qword ptr [ebx], xmm2
- punpckhdq xmm0, xmm6
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm1 // use xmm0 as the temp register.
- punpckldq xmm1, xmm5
- movlpd qword ptr [edx], xmm1
- movhpd qword ptr [ebx], xmm1
- punpckhdq xmm0, xmm5
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm3 // use xmm0 as the temp register.
- punpckldq xmm3, xmm7
- movlpd qword ptr [edx], xmm3
- movhpd qword ptr [ebx], xmm3
- punpckhdq xmm0, xmm7
- sub ecx, 8
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- jg convertloop
-
- mov esp, [esp + 16]
- pop ebp
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-#elif !defined(LIBYUV_DISABLE_X86) && \
- (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
-#define HAS_TRANSPOSE_WX8_SSSE3
-static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
- "1: \n"
- "movq (%0),%%xmm0 \n"
- "movq (%0,%3),%%xmm1 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "movq (%0),%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "palignr $0x8,%%xmm1,%%xmm1 \n"
- "movq (%0,%3),%%xmm3 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "movq (%0),%%xmm4 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "movq (%0,%3),%%xmm5 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "movq (%0),%%xmm6 \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq (%0,%3),%%xmm7 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "neg %3 \n"
- "movdqa %%xmm6,%%xmm7 \n"
- "lea 0x8(%0,%3,8),%0 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "neg %3 \n"
- // Second round of bit swap.
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "palignr $0x8,%%xmm2,%%xmm2 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "movdqa %%xmm5,%%xmm7 \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "punpckldq %%xmm4,%%xmm0 \n"
- "movq %%xmm0,(%1) \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "palignr $0x8,%%xmm4,%%xmm4 \n"
- "movq %%xmm4,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "movq %%xmm2,(%1) \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movq %%xmm6,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm1,%%xmm5 \n"
- "movq %%xmm1,(%1) \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq %%xmm5,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movq %%xmm3,(%1) \n"
- "movdqa %%xmm3,%%xmm7 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "sub $0x8,%2 \n"
- "movq %%xmm7,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "r"((intptr_t)(dst_stride)) // %4
- : "memory", "cc"
- #if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- #endif
- );
-}
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)
-#define HAS_TRANSPOSE_UVWX8_SSE2
-extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int w);
- asm (
- DECLARE_FUNCTION(TransposeUVWx8_SSE2)
- "push %ebx \n"
- "push %esi \n"
- "push %edi \n"
- "push %ebp \n"
- "mov 0x14(%esp),%eax \n"
- "mov 0x18(%esp),%edi \n"
- "mov 0x1c(%esp),%edx \n"
- "mov 0x20(%esp),%esi \n"
- "mov 0x24(%esp),%ebx \n"
- "mov 0x28(%esp),%ebp \n"
- "mov %esp,%ecx \n"
- "sub $0x14,%esp \n"
- "and $0xfffffff0,%esp \n"
- "mov %ecx,0x10(%esp) \n"
- "mov 0x2c(%ecx),%ecx \n"
-
-"1: \n"
- "movdqa (%eax),%xmm0 \n"
- "movdqa (%eax,%edi,1),%xmm1 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm0,%xmm7 \n"
- "punpcklbw %xmm1,%xmm0 \n"
- "punpckhbw %xmm1,%xmm7 \n"
- "movdqa %xmm7,%xmm1 \n"
- "movdqa (%eax),%xmm2 \n"
- "movdqa (%eax,%edi,1),%xmm3 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm2,%xmm7 \n"
- "punpcklbw %xmm3,%xmm2 \n"
- "punpckhbw %xmm3,%xmm7 \n"
- "movdqa %xmm7,%xmm3 \n"
- "movdqa (%eax),%xmm4 \n"
- "movdqa (%eax,%edi,1),%xmm5 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm4,%xmm7 \n"
- "punpcklbw %xmm5,%xmm4 \n"
- "punpckhbw %xmm5,%xmm7 \n"
- "movdqa %xmm7,%xmm5 \n"
- "movdqa (%eax),%xmm6 \n"
- "movdqa (%eax,%edi,1),%xmm7 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm5,(%esp) \n"
- "neg %edi \n"
- "movdqa %xmm6,%xmm5 \n"
- "punpcklbw %xmm7,%xmm6 \n"
- "punpckhbw %xmm7,%xmm5 \n"
- "movdqa %xmm5,%xmm7 \n"
- "lea 0x10(%eax,%edi,8),%eax \n"
- "neg %edi \n"
- "movdqa %xmm0,%xmm5 \n"
- "punpcklwd %xmm2,%xmm0 \n"
- "punpckhwd %xmm2,%xmm5 \n"
- "movdqa %xmm5,%xmm2 \n"
- "movdqa %xmm1,%xmm5 \n"
- "punpcklwd %xmm3,%xmm1 \n"
- "punpckhwd %xmm3,%xmm5 \n"
- "movdqa %xmm5,%xmm3 \n"
- "movdqa %xmm4,%xmm5 \n"
- "punpcklwd %xmm6,%xmm4 \n"
- "punpckhwd %xmm6,%xmm5 \n"
- "movdqa %xmm5,%xmm6 \n"
- "movdqa (%esp),%xmm5 \n"
- "movdqa %xmm6,(%esp) \n"
- "movdqa %xmm5,%xmm6 \n"
- "punpcklwd %xmm7,%xmm5 \n"
- "punpckhwd %xmm7,%xmm6 \n"
- "movdqa %xmm6,%xmm7 \n"
- "movdqa %xmm0,%xmm6 \n"
- "punpckldq %xmm4,%xmm0 \n"
- "punpckhdq %xmm4,%xmm6 \n"
- "movdqa %xmm6,%xmm4 \n"
- "movdqa (%esp),%xmm6 \n"
- "movlpd %xmm0,(%edx) \n"
- "movhpd %xmm0,(%ebx) \n"
- "movlpd %xmm4,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm4,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm2,%xmm0 \n"
- "punpckldq %xmm6,%xmm2 \n"
- "movlpd %xmm2,(%edx) \n"
- "movhpd %xmm2,(%ebx) \n"
- "punpckhdq %xmm6,%xmm0 \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm1,%xmm0 \n"
- "punpckldq %xmm5,%xmm1 \n"
- "movlpd %xmm1,(%edx) \n"
- "movhpd %xmm1,(%ebx) \n"
- "punpckhdq %xmm5,%xmm0 \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm3,%xmm0 \n"
- "punpckldq %xmm7,%xmm3 \n"
- "movlpd %xmm3,(%edx) \n"
- "movhpd %xmm3,(%ebx) \n"
- "punpckhdq %xmm7,%xmm0 \n"
- "sub $0x8,%ecx \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "jg 1b \n"
- "mov 0x10(%esp),%esp \n"
- "pop %ebp \n"
- "pop %edi \n"
- "pop %esi \n"
- "pop %ebx \n"
-#if defined(__native_client__)
- "pop %ecx \n"
- "and $0xffffffe0,%ecx \n"
- "jmp *%ecx \n"
-#else
- "ret \n"
-#endif
-);
-#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
- defined(__x86_64__)
-// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
-#define HAS_TRANSPOSE_WX8_FAST_SSSE3
-static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
-"1: \n"
- "movdqa (%0),%%xmm0 \n"
- "movdqa (%0,%3),%%xmm1 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm0,%%xmm8 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm8 \n"
- "movdqa (%0),%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm8,%%xmm9 \n"
- "palignr $0x8,%%xmm1,%%xmm1 \n"
- "palignr $0x8,%%xmm9,%%xmm9 \n"
- "movdqa (%0,%3),%%xmm3 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm2,%%xmm10 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "punpckhbw %%xmm3,%%xmm10 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "movdqa %%xmm10,%%xmm11 \n"
- "movdqa (%0),%%xmm4 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "palignr $0x8,%%xmm11,%%xmm11 \n"
- "movdqa (%0,%3),%%xmm5 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm4,%%xmm12 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "punpckhbw %%xmm5,%%xmm12 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "movdqa %%xmm12,%%xmm13 \n"
- "movdqa (%0),%%xmm6 \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "palignr $0x8,%%xmm13,%%xmm13 \n"
- "movdqa (%0,%3),%%xmm7 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm6,%%xmm14 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "punpckhbw %%xmm7,%%xmm14 \n"
- "neg %3 \n"
- "movdqa %%xmm6,%%xmm7 \n"
- "movdqa %%xmm14,%%xmm15 \n"
- "lea 0x10(%0,%3,8),%0 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- "neg %3 \n"
- // Second round of bit swap.
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "palignr $0x8,%%xmm2,%%xmm2 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "movdqa %%xmm5,%%xmm7 \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "punpcklwd %%xmm10,%%xmm8 \n"
- "punpcklwd %%xmm11,%%xmm9 \n"
- "movdqa %%xmm8,%%xmm10 \n"
- "movdqa %%xmm9,%%xmm11 \n"
- "palignr $0x8,%%xmm10,%%xmm10 \n"
- "palignr $0x8,%%xmm11,%%xmm11 \n"
- "punpcklwd %%xmm14,%%xmm12 \n"
- "punpcklwd %%xmm15,%%xmm13 \n"
- "movdqa %%xmm12,%%xmm14 \n"
- "movdqa %%xmm13,%%xmm15 \n"
- "palignr $0x8,%%xmm14,%%xmm14 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "punpckldq %%xmm4,%%xmm0 \n"
- "movq %%xmm0,(%1) \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "palignr $0x8,%%xmm4,%%xmm4 \n"
- "movq %%xmm4,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "movq %%xmm2,(%1) \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movq %%xmm6,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm1,%%xmm5 \n"
- "movq %%xmm1,(%1) \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq %%xmm5,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movq %%xmm3,(%1) \n"
- "movdqa %%xmm3,%%xmm7 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "movq %%xmm7,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm12,%%xmm8 \n"
- "movq %%xmm8,(%1) \n"
- "movdqa %%xmm8,%%xmm12 \n"
- "palignr $0x8,%%xmm12,%%xmm12 \n"
- "movq %%xmm12,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm14,%%xmm10 \n"
- "movdqa %%xmm10,%%xmm14 \n"
- "movq %%xmm10,(%1) \n"
- "palignr $0x8,%%xmm14,%%xmm14 \n"
- "punpckldq %%xmm13,%%xmm9 \n"
- "movq %%xmm14,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm9,%%xmm13 \n"
- "movq %%xmm9,(%1) \n"
- "palignr $0x8,%%xmm13,%%xmm13 \n"
- "movq %%xmm13,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm15,%%xmm11 \n"
- "movq %%xmm11,(%1) \n"
- "movdqa %%xmm11,%%xmm15 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- "sub $0x10,%2 \n"
- "movq %%xmm15,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "r"((intptr_t)(dst_stride)) // %4
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
- "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
-);
-}
-
-#define HAS_TRANSPOSE_UVWX8_SSE2
-static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int w) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
-"1: \n"
- "movdqa (%0),%%xmm0 \n"
- "movdqa (%0,%4),%%xmm1 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm0,%%xmm8 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm1 \n"
- "movdqa (%0),%%xmm2 \n"
- "movdqa (%0,%4),%%xmm3 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm2,%%xmm8 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "punpckhbw %%xmm3,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm3 \n"
- "movdqa (%0),%%xmm4 \n"
- "movdqa (%0,%4),%%xmm5 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm4,%%xmm8 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "punpckhbw %%xmm5,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm5 \n"
- "movdqa (%0),%%xmm6 \n"
- "movdqa (%0,%4),%%xmm7 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm6,%%xmm8 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "neg %4 \n"
- "lea 0x10(%0,%4,8),%0 \n"
- "punpckhbw %%xmm7,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm7 \n"
- "neg %4 \n"
- // Second round of bit swap.
- "movdqa %%xmm0,%%xmm8 \n"
- "movdqa %%xmm1,%%xmm9 \n"
- "punpckhwd %%xmm2,%%xmm8 \n"
- "punpckhwd %%xmm3,%%xmm9 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm8,%%xmm2 \n"
- "movdqa %%xmm9,%%xmm3 \n"
- "movdqa %%xmm4,%%xmm8 \n"
- "movdqa %%xmm5,%%xmm9 \n"
- "punpckhwd %%xmm6,%%xmm8 \n"
- "punpckhwd %%xmm7,%%xmm9 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm8,%%xmm6 \n"
- "movdqa %%xmm9,%%xmm7 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "movdqa %%xmm0,%%xmm8 \n"
- "punpckldq %%xmm4,%%xmm0 \n"
- "movlpd %%xmm0,(%1) \n" // Write back U channel
- "movhpd %%xmm0,(%2) \n" // Write back V channel
- "punpckhdq %%xmm4,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm2,%%xmm8 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movlpd %%xmm2,(%1) \n"
- "movhpd %%xmm2,(%2) \n"
- "punpckhdq %%xmm6,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm1,%%xmm8 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movlpd %%xmm1,(%1) \n"
- "movhpd %%xmm1,(%2) \n"
- "punpckhdq %%xmm5,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm3,%%xmm8 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movlpd %%xmm3,(%1) \n"
- "movhpd %%xmm3,(%2) \n"
- "punpckhdq %%xmm7,%%xmm8 \n"
- "sub $0x8,%3 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst_a), // %1
- "+r"(dst_b), // %2
- "+r"(w) // %3
- : "r"((intptr_t)(src_stride)), // %4
- "r"((intptr_t)(dst_stride_a)), // %5
- "r"((intptr_t)(dst_stride_b)) // %6
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
- "xmm8", "xmm9"
-);
-}
-#endif
-#endif
-
-static void TransposeWx8_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst[0] = src[0 * src_stride];
- dst[1] = src[1 * src_stride];
- dst[2] = src[2 * src_stride];
- dst[3] = src[3 * src_stride];
- dst[4] = src[4 * src_stride];
- dst[5] = src[5 * src_stride];
- dst[6] = src[6 * src_stride];
- dst[7] = src[7 * src_stride];
- ++src;
- dst += dst_stride;
- }
-}
-
-static void TransposeWxH_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- int i;
- for (i = 0; i < width; ++i) {
- int j;
- for (j = 0; j < height; ++j) {
- dst[i * dst_stride + j] = src[j * src_stride + i];
- }
- }
-}
-
-LIBYUV_API
-void TransposePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- int i = height;
- void (*TransposeWx8)(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) = TransposeWx8_C;
-#if defined(HAS_TRANSPOSE_WX8_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- TransposeWx8 = TransposeWx8_NEON;
- }
-#endif
-#if defined(HAS_TRANSPOSE_WX8_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
- TransposeWx8 = TransposeWx8_SSSE3;
- }
-#endif
-#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) &&
- IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
- TransposeWx8 = TransposeWx8_FAST_SSSE3;
- }
-#endif
-#if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
- if (IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
- TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
- } else {
- TransposeWx8 = TransposeWx8_MIPS_DSPR2;
- }
- }
-#endif
-
- // Work across the source in 8x8 tiles
- while (i >= 8) {
- TransposeWx8(src, src_stride, dst, dst_stride, width);
- src += 8 * src_stride; // Go down 8 rows.
- dst += 8; // Move over 8 columns.
- i -= 8;
- }
-
- TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
-}
-
-LIBYUV_API
-void RotatePlane90(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Rotate by 90 is a transpose with the source read
- // from bottom to top. So set the source pointer to the end
- // of the buffer and flip the sign of the source stride.
- src += src_stride * (height - 1);
- src_stride = -src_stride;
- TransposePlane(src, src_stride, dst, dst_stride, width, height);
-}
-
-LIBYUV_API
-void RotatePlane270(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Rotate by 270 is a transpose with the destination written
- // from bottom to top. So set the destination pointer to the end
- // of the buffer and flip the sign of the destination stride.
- dst += dst_stride * (width - 1);
- dst_stride = -dst_stride;
- TransposePlane(src, src_stride, dst, dst_stride, width, height);
-}
-
-LIBYUV_API
-void RotatePlane180(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Swap first and last row and mirror the content. Uses a temporary row.
- align_buffer_64(row, width);
- const uint8* src_bot = src + src_stride * (height - 1);
- uint8* dst_bot = dst + dst_stride * (height - 1);
- int half_height = (height + 1) >> 1;
- int y;
- void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_MIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_NEON;
- }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- MirrorRow = MirrorRow_SSE2;
- }
-#endif
-#if defined(HAS_MIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- MirrorRow = MirrorRow_SSSE3;
- }
-#endif
-#if defined(HAS_MIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
- MirrorRow = MirrorRow_AVX2;
- }
-#endif
-#if defined(HAS_MIRRORROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
- MirrorRow = MirrorRow_MIPS_DSPR2;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- CopyRow = CopyRow_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Odd height will harmlessly mirror the middle row twice.
- for (y = 0; y < half_height; ++y) {
- MirrorRow(src, row, width); // Mirror first row into a buffer
- src += src_stride;
- MirrorRow(src_bot, dst, width); // Mirror last row into first row
- dst += dst_stride;
- CopyRow(row, dst_bot, width); // Copy first mirrored row into last
- src_bot -= src_stride;
- dst_bot -= dst_stride;
- }
- free_aligned_buffer_64(row);
-}
-
-static void TransposeUVWx8_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst_a[0] = src[0 * src_stride + 0];
- dst_b[0] = src[0 * src_stride + 1];
- dst_a[1] = src[1 * src_stride + 0];
- dst_b[1] = src[1 * src_stride + 1];
- dst_a[2] = src[2 * src_stride + 0];
- dst_b[2] = src[2 * src_stride + 1];
- dst_a[3] = src[3 * src_stride + 0];
- dst_b[3] = src[3 * src_stride + 1];
- dst_a[4] = src[4 * src_stride + 0];
- dst_b[4] = src[4 * src_stride + 1];
- dst_a[5] = src[5 * src_stride + 0];
- dst_b[5] = src[5 * src_stride + 1];
- dst_a[6] = src[6 * src_stride + 0];
- dst_b[6] = src[6 * src_stride + 1];
- dst_a[7] = src[7 * src_stride + 0];
- dst_b[7] = src[7 * src_stride + 1];
- src += 2;
- dst_a += dst_stride_a;
- dst_b += dst_stride_b;
- }
-}
-
-static void TransposeUVWxH_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- int i;
- for (i = 0; i < width * 2; i += 2) {
- int j;
- for (j = 0; j < height; ++j) {
- dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
- dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
- }
- }
-}
-
-LIBYUV_API
-void TransposeUV(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- int i = height;
- void (*TransposeUVWx8)(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) = TransposeUVWx8_C;
-#if defined(HAS_TRANSPOSE_UVWX8_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- TransposeUVWx8 = TransposeUVWx8_NEON;
- }
-#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(width, 8) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
- TransposeUVWx8 = TransposeUVWx8_SSE2;
- }
-#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
- TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
- }
-#endif
-
- // Work through the source in 8x8 tiles.
- while (i >= 8) {
- TransposeUVWx8(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width);
- src += 8 * src_stride; // Go down 8 rows.
- dst_a += 8; // Move over 8 columns.
- dst_b += 8; // Move over 8 columns.
- i -= 8;
- }
-
- TransposeUVWxH_C(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width, i);
-}
-
-LIBYUV_API
-void RotateUV90(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- src += src_stride * (height - 1);
- src_stride = -src_stride;
-
- TransposeUV(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width, height);
-}
-
-LIBYUV_API
-void RotateUV270(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- dst_a += dst_stride_a * (width - 1);
- dst_b += dst_stride_b * (width - 1);
- dst_stride_a = -dst_stride_a;
- dst_stride_b = -dst_stride_b;
-
- TransposeUV(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width, height);
-}
-
-// Rotate 180 is a horizontal and vertical flip.
-LIBYUV_API
-void RotateUV180(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- int i;
- void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
- MirrorUVRow_C;
-#if defined(HAS_MIRRORUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- MirrorRowUV = MirrorUVRow_NEON;
- }
-#elif defined(HAS_MIRRORROW_UV_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
- MirrorRowUV = MirrorUVRow_SSSE3;
- }
-#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
- MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
- }
-#endif
-
- dst_a += dst_stride_a * (height - 1);
- dst_b += dst_stride_b * (height - 1);
-
- for (i = 0; i < height; ++i) {
- MirrorRowUV(src, dst_a, dst_b, width);
- src += src_stride;
- dst_a -= dst_stride_a;
- dst_b -= dst_stride_b;
- }
-}
-
-LIBYUV_API
-int RotatePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height,
- enum RotationMode mode) {
- if (!src || width <= 0 || height == 0 || !dst) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src = src + (height - 1) * src_stride;
- src_stride = -src_stride;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- CopyPlane(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- case kRotate90:
- RotatePlane90(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- case kRotate270:
- RotatePlane270(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- case kRotate180:
- RotatePlane180(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-LIBYUV_API
-int I420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- enum RotationMode mode) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
- !dst_y || !dst_u || !dst_v) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- return I420Copy(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
- case kRotate90:
- RotatePlane90(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotatePlane90(src_u, src_stride_u,
- dst_u, dst_stride_u,
- halfwidth, halfheight);
- RotatePlane90(src_v, src_stride_v,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate270:
- RotatePlane270(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotatePlane270(src_u, src_stride_u,
- dst_u, dst_stride_u,
- halfwidth, halfheight);
- RotatePlane270(src_v, src_stride_v,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate180:
- RotatePlane180(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotatePlane180(src_u, src_stride_u,
- dst_u, dst_stride_u,
- halfwidth, halfheight);
- RotatePlane180(src_v, src_stride_v,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- enum RotationMode mode) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_uv || width <= 0 || height == 0 ||
- !dst_y || !dst_u || !dst_v) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_uv = src_uv + (halfheight - 1) * src_stride_uv;
- src_stride_y = -src_stride_y;
- src_stride_uv = -src_stride_uv;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- return NV12ToI420(src_y, src_stride_y,
- src_uv, src_stride_uv,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
- case kRotate90:
- RotatePlane90(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotateUV90(src_uv, src_stride_uv,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate270:
- RotatePlane270(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotateUV270(src_uv, src_stride_uv,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate180:
- RotatePlane180(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotateUV180(src_uv, src_stride_uv,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc
deleted file mode 100755
index ab0f9ce070..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/convert.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// ARGBScale has a function to copy pixels to a row, striding each source
-// pixel by a constant.
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || \
- (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
-#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
- int src_stepx,
- uint8* dst_ptr, int dst_width);
-#endif
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
- int src_stepx,
- uint8* dst_ptr, int dst_width);
-#endif
-
-void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
- int src_stepx,
- uint8* dst_ptr, int dst_width);
-
-static void ARGBTranspose(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- int i;
- int src_pixel_step = src_stride >> 2;
- void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
- int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
-#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest.
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
- }
-#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest.
- IS_ALIGNED(src, 4)) {
- ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
- }
-#endif
-
- for (i = 0; i < width; ++i) { // column of source to row of dest.
- ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
- dst += dst_stride;
- src += 4;
- }
-}
-
-void ARGBRotate90(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Rotate by 90 is a ARGBTranspose with the source read
- // from bottom to top. So set the source pointer to the end
- // of the buffer and flip the sign of the source stride.
- src += src_stride * (height - 1);
- src_stride = -src_stride;
- ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
-}
-
-void ARGBRotate270(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Rotate by 270 is a ARGBTranspose with the destination written
- // from bottom to top. So set the destination pointer to the end
- // of the buffer and flip the sign of the destination stride.
- dst += dst_stride * (width - 1);
- dst_stride = -dst_stride;
- ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
-}
-
-void ARGBRotate180(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Swap first and last row and mirror the content. Uses a temporary row.
- align_buffer_64(row, width * 4);
- const uint8* src_bot = src + src_stride * (height - 1);
- uint8* dst_bot = dst + dst_stride * (height - 1);
- int half_height = (height + 1) >> 1;
- int y;
- void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
- ARGBMirrorRow_C;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_ARGBMIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- ARGBMirrorRow = ARGBMirrorRow_SSSE3;
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
- ARGBMirrorRow = ARGBMirrorRow_AVX2;
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- CopyRow = CopyRow_X86;
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- CopyRow = CopyRow_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Odd height will harmlessly mirror the middle row twice.
- for (y = 0; y < half_height; ++y) {
- ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
- ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
- CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
- src += src_stride;
- dst += dst_stride;
- src_bot -= src_stride;
- dst_bot -= dst_stride;
- }
- free_aligned_buffer_64(row);
-}
-
-LIBYUV_API
-int ARGBRotate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
- enum RotationMode mode) {
- if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- return ARGBCopy(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- case kRotate90:
- ARGBRotate90(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- return 0;
- case kRotate270:
- ARGBRotate270(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- return 0;
- case kRotate180:
- ARGBRotate180(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc
deleted file mode 100755
index 04d5a663f7..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc
+++ /dev/null
@@ -1,486 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_MIPS) && \
- defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
- "andi $t0, %[dst], 0x3 \n"
- "andi $t1, %[dst_stride], 0x3 \n"
- "or $t0, $t0, $t1 \n"
- "bnez $t0, 11f \n"
- " subu $t7, $t9, %[src_stride] \n"
-//dst + dst_stride word aligned
- "1: \n"
- "lbu $t0, 0(%[src]) \n"
- "lbux $t1, %[src_stride](%[src]) \n"
- "lbux $t8, $t2(%[src]) \n"
- "lbux $t9, $t3(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s0, $t8, $t0 \n"
- "lbux $t0, $t4(%[src]) \n"
- "lbux $t1, $t5(%[src]) \n"
- "lbux $t8, $t6(%[src]) \n"
- "lbux $t9, $t7(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s1, $t8, $t0 \n"
- "sw $s0, 0(%[dst]) \n"
- "addiu %[width], -1 \n"
- "addiu %[src], 1 \n"
- "sw $s1, 4(%[dst]) \n"
- "bnez %[width], 1b \n"
- " addu %[dst], %[dst], %[dst_stride] \n"
- "b 2f \n"
-//dst + dst_stride unaligned
- "11: \n"
- "lbu $t0, 0(%[src]) \n"
- "lbux $t1, %[src_stride](%[src]) \n"
- "lbux $t8, $t2(%[src]) \n"
- "lbux $t9, $t3(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s0, $t8, $t0 \n"
- "lbux $t0, $t4(%[src]) \n"
- "lbux $t1, $t5(%[src]) \n"
- "lbux $t8, $t6(%[src]) \n"
- "lbux $t9, $t7(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s1, $t8, $t0 \n"
- "swr $s0, 0(%[dst]) \n"
- "swl $s0, 3(%[dst]) \n"
- "addiu %[width], -1 \n"
- "addiu %[src], 1 \n"
- "swr $s1, 4(%[dst]) \n"
- "swl $s1, 7(%[dst]) \n"
- "bnez %[width], 11b \n"
- "addu %[dst], %[dst], %[dst_stride] \n"
- "2: \n"
- ".set pop \n"
- :[src] "+r" (src),
- [dst] "+r" (dst),
- [width] "+r" (width)
- :[src_stride] "r" (src_stride),
- [dst_stride] "r" (dst_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1"
- );
-}
-
-void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
- __asm__ __volatile__ (
- ".set noat \n"
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
-
- "srl $AT, %[width], 0x2 \n"
- "andi $t0, %[dst], 0x3 \n"
- "andi $t1, %[dst_stride], 0x3 \n"
- "or $t0, $t0, $t1 \n"
- "bnez $t0, 11f \n"
- " subu $t7, $t9, %[src_stride] \n"
-//dst + dst_stride word aligned
- "1: \n"
- "lw $t0, 0(%[src]) \n"
- "lwx $t1, %[src_stride](%[src]) \n"
- "lwx $t8, $t2(%[src]) \n"
- "lwx $t9, $t3(%[src]) \n"
-
-// t0 = | 30 | 20 | 10 | 00 |
-// t1 = | 31 | 21 | 11 | 01 |
-// t8 = | 32 | 22 | 12 | 02 |
-// t9 = | 33 | 23 | 13 | 03 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 21 | 01 | 20 | 00 |
- // s1 = | 23 | 03 | 22 | 02 |
- // s2 = | 31 | 11 | 30 | 10 |
- // s3 = | 33 | 13 | 32 | 12 |
-
- "precr.qb.ph $s4, $s1, $s0 \n"
- "precrq.qb.ph $s5, $s1, $s0 \n"
- "precr.qb.ph $s6, $s3, $s2 \n"
- "precrq.qb.ph $s7, $s3, $s2 \n"
-
- // s4 = | 03 | 02 | 01 | 00 |
- // s5 = | 23 | 22 | 21 | 20 |
- // s6 = | 13 | 12 | 11 | 10 |
- // s7 = | 33 | 32 | 31 | 30 |
-
- "lwx $t0, $t4(%[src]) \n"
- "lwx $t1, $t5(%[src]) \n"
- "lwx $t8, $t6(%[src]) \n"
- "lwx $t9, $t7(%[src]) \n"
-
-// t0 = | 34 | 24 | 14 | 04 |
-// t1 = | 35 | 25 | 15 | 05 |
-// t8 = | 36 | 26 | 16 | 06 |
-// t9 = | 37 | 27 | 17 | 07 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 25 | 05 | 24 | 04 |
- // s1 = | 27 | 07 | 26 | 06 |
- // s2 = | 35 | 15 | 34 | 14 |
- // s3 = | 37 | 17 | 36 | 16 |
-
- "precr.qb.ph $t0, $s1, $s0 \n"
- "precrq.qb.ph $t1, $s1, $s0 \n"
- "precr.qb.ph $t8, $s3, $s2 \n"
- "precrq.qb.ph $t9, $s3, $s2 \n"
-
- // t0 = | 07 | 06 | 05 | 04 |
- // t1 = | 27 | 26 | 25 | 24 |
- // t8 = | 17 | 16 | 15 | 14 |
- // t9 = | 37 | 36 | 35 | 34 |
-
- "addu $s0, %[dst], %[dst_stride] \n"
- "addu $s1, $s0, %[dst_stride] \n"
- "addu $s2, $s1, %[dst_stride] \n"
-
- "sw $s4, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $s6, 0($s0) \n"
- "sw $t8, 4($s0) \n"
- "sw $s5, 0($s1) \n"
- "sw $t1, 4($s1) \n"
- "sw $s7, 0($s2) \n"
- "sw $t9, 4($s2) \n"
-
- "addiu $AT, -1 \n"
- "addiu %[src], 4 \n"
-
- "bnez $AT, 1b \n"
- " addu %[dst], $s2, %[dst_stride] \n"
- "b 2f \n"
-//dst + dst_stride unaligned
- "11: \n"
- "lw $t0, 0(%[src]) \n"
- "lwx $t1, %[src_stride](%[src]) \n"
- "lwx $t8, $t2(%[src]) \n"
- "lwx $t9, $t3(%[src]) \n"
-
-// t0 = | 30 | 20 | 10 | 00 |
-// t1 = | 31 | 21 | 11 | 01 |
-// t8 = | 32 | 22 | 12 | 02 |
-// t9 = | 33 | 23 | 13 | 03 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 21 | 01 | 20 | 00 |
- // s1 = | 23 | 03 | 22 | 02 |
- // s2 = | 31 | 11 | 30 | 10 |
- // s3 = | 33 | 13 | 32 | 12 |
-
- "precr.qb.ph $s4, $s1, $s0 \n"
- "precrq.qb.ph $s5, $s1, $s0 \n"
- "precr.qb.ph $s6, $s3, $s2 \n"
- "precrq.qb.ph $s7, $s3, $s2 \n"
-
- // s4 = | 03 | 02 | 01 | 00 |
- // s5 = | 23 | 22 | 21 | 20 |
- // s6 = | 13 | 12 | 11 | 10 |
- // s7 = | 33 | 32 | 31 | 30 |
-
- "lwx $t0, $t4(%[src]) \n"
- "lwx $t1, $t5(%[src]) \n"
- "lwx $t8, $t6(%[src]) \n"
- "lwx $t9, $t7(%[src]) \n"
-
-// t0 = | 34 | 24 | 14 | 04 |
-// t1 = | 35 | 25 | 15 | 05 |
-// t8 = | 36 | 26 | 16 | 06 |
-// t9 = | 37 | 27 | 17 | 07 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 25 | 05 | 24 | 04 |
- // s1 = | 27 | 07 | 26 | 06 |
- // s2 = | 35 | 15 | 34 | 14 |
- // s3 = | 37 | 17 | 36 | 16 |
-
- "precr.qb.ph $t0, $s1, $s0 \n"
- "precrq.qb.ph $t1, $s1, $s0 \n"
- "precr.qb.ph $t8, $s3, $s2 \n"
- "precrq.qb.ph $t9, $s3, $s2 \n"
-
- // t0 = | 07 | 06 | 05 | 04 |
- // t1 = | 27 | 26 | 25 | 24 |
- // t8 = | 17 | 16 | 15 | 14 |
- // t9 = | 37 | 36 | 35 | 34 |
-
- "addu $s0, %[dst], %[dst_stride] \n"
- "addu $s1, $s0, %[dst_stride] \n"
- "addu $s2, $s1, %[dst_stride] \n"
-
- "swr $s4, 0(%[dst]) \n"
- "swl $s4, 3(%[dst]) \n"
- "swr $t0, 4(%[dst]) \n"
- "swl $t0, 7(%[dst]) \n"
- "swr $s6, 0($s0) \n"
- "swl $s6, 3($s0) \n"
- "swr $t8, 4($s0) \n"
- "swl $t8, 7($s0) \n"
- "swr $s5, 0($s1) \n"
- "swl $s5, 3($s1) \n"
- "swr $t1, 4($s1) \n"
- "swl $t1, 7($s1) \n"
- "swr $s7, 0($s2) \n"
- "swl $s7, 3($s2) \n"
- "swr $t9, 4($s2) \n"
- "swl $t9, 7($s2) \n"
-
- "addiu $AT, -1 \n"
- "addiu %[src], 4 \n"
-
- "bnez $AT, 11b \n"
- " addu %[dst], $s2, %[dst_stride] \n"
- "2: \n"
- ".set pop \n"
- ".set at \n"
- :[src] "+r" (src),
- [dst] "+r" (dst),
- [width] "+r" (width)
- :[src_stride] "r" (src_stride),
- [dst_stride] "r" (dst_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3", "s4",
- "s5", "s6", "s7"
- );
-}
-
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
- "subu $t7, $t9, %[src_stride] \n"
- "srl $t1, %[width], 1 \n"
-
-// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
- "andi $t0, %[dst_a], 0x3 \n"
- "andi $t8, %[dst_b], 0x3 \n"
- "or $t0, $t0, $t8 \n"
- "andi $t8, %[dst_stride_a], 0x3 \n"
- "andi $s5, %[dst_stride_b], 0x3 \n"
- "or $t8, $t8, $s5 \n"
- "or $t0, $t0, $t8 \n"
- "bnez $t0, 11f \n"
- " nop \n"
-// dst + dst_stride word aligned (both, a & b dst addresses)
- "1: \n"
- "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
- "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
- "addu $s5, %[dst_a], %[dst_stride_a] \n"
- "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
- "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
- "addu $s6, %[dst_b], %[dst_stride_b] \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
-
- "sw $s3, 0($s5) \n"
- "sw $s4, 0($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
-
- "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
- "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
- "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
- "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
- "sw $s3, 0(%[dst_a]) \n"
- "sw $s4, 0(%[dst_b]) \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
- "sw $s3, 4($s5) \n"
- "sw $s4, 4($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
-
- "addiu %[src], 4 \n"
- "addiu $t1, -1 \n"
- "sll $t0, %[dst_stride_a], 1 \n"
- "sll $t8, %[dst_stride_b], 1 \n"
- "sw $s3, 4(%[dst_a]) \n"
- "sw $s4, 4(%[dst_b]) \n"
- "addu %[dst_a], %[dst_a], $t0 \n"
- "bnez $t1, 1b \n"
- " addu %[dst_b], %[dst_b], $t8 \n"
- "b 2f \n"
- " nop \n"
-
-// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
- "11: \n"
- "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
- "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
- "addu $s5, %[dst_a], %[dst_stride_a] \n"
- "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
- "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
- "addu $s6, %[dst_b], %[dst_stride_b] \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
-
- "swr $s3, 0($s5) \n"
- "swl $s3, 3($s5) \n"
- "swr $s4, 0($s6) \n"
- "swl $s4, 3($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
-
- "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
- "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
- "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
- "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
- "swr $s3, 0(%[dst_a]) \n"
- "swl $s3, 3(%[dst_a]) \n"
- "swr $s4, 0(%[dst_b]) \n"
- "swl $s4, 3(%[dst_b]) \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
-
- "swr $s3, 4($s5) \n"
- "swl $s3, 7($s5) \n"
- "swr $s4, 4($s6) \n"
- "swl $s4, 7($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
-
- "addiu %[src], 4 \n"
- "addiu $t1, -1 \n"
- "sll $t0, %[dst_stride_a], 1 \n"
- "sll $t8, %[dst_stride_b], 1 \n"
- "swr $s3, 4(%[dst_a]) \n"
- "swl $s3, 7(%[dst_a]) \n"
- "swr $s4, 4(%[dst_b]) \n"
- "swl $s4, 7(%[dst_b]) \n"
- "addu %[dst_a], %[dst_a], $t0 \n"
- "bnez $t1, 11b \n"
- " addu %[dst_b], %[dst_b], $t8 \n"
-
- "2: \n"
- ".set pop \n"
- : [src] "+r" (src),
- [dst_a] "+r" (dst_a),
- [dst_b] "+r" (dst_b),
- [width] "+r" (width),
- [src_stride] "+r" (src_stride)
- : [dst_stride_a] "r" (dst_stride_a),
- [dst_stride_b] "r" (dst_stride_b)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc
deleted file mode 100755
index 274c4109cd..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-static uvec8 kVTbl4x4Transpose =
- { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
- const uint8* src_temp = NULL;
- asm volatile (
- // loops are on blocks of 8. loop will stop when
- // counter gets to or below 0. starting the counter
- // at w-8 allow for this
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- "sub %5, #8 \n"
-
- // handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
- "1: \n"
- "mov %0, %1 \n"
-
- "vld1.8 {d0}, [%0], %2 \n"
- "vld1.8 {d1}, [%0], %2 \n"
- "vld1.8 {d2}, [%0], %2 \n"
- "vld1.8 {d3}, [%0], %2 \n"
- "vld1.8 {d4}, [%0], %2 \n"
- "vld1.8 {d5}, [%0], %2 \n"
- "vld1.8 {d6}, [%0], %2 \n"
- "vld1.8 {d7}, [%0] \n"
-
- "vtrn.8 d1, d0 \n"
- "vtrn.8 d3, d2 \n"
- "vtrn.8 d5, d4 \n"
- "vtrn.8 d7, d6 \n"
-
- "vtrn.16 d1, d3 \n"
- "vtrn.16 d0, d2 \n"
- "vtrn.16 d5, d7 \n"
- "vtrn.16 d4, d6 \n"
-
- "vtrn.32 d1, d5 \n"
- "vtrn.32 d0, d4 \n"
- "vtrn.32 d3, d7 \n"
- "vtrn.32 d2, d6 \n"
-
- "vrev16.8 q0, q0 \n"
- "vrev16.8 q1, q1 \n"
- "vrev16.8 q2, q2 \n"
- "vrev16.8 q3, q3 \n"
-
- "mov %0, %3 \n"
-
- "vst1.8 {d1}, [%0], %4 \n"
- "vst1.8 {d0}, [%0], %4 \n"
- "vst1.8 {d3}, [%0], %4 \n"
- "vst1.8 {d2}, [%0], %4 \n"
- "vst1.8 {d5}, [%0], %4 \n"
- "vst1.8 {d4}, [%0], %4 \n"
- "vst1.8 {d7}, [%0], %4 \n"
- "vst1.8 {d6}, [%0] \n"
-
- "add %1, #8 \n" // src += 8
- "add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
- "subs %5, #8 \n" // w -= 8
- "bge 1b \n"
-
- // add 8 back to counter. if the result is 0 there are
- // no residuals.
- "adds %5, #8 \n"
- "beq 4f \n"
-
- // some residual, so between 1 and 7 lines left to transpose
- "cmp %5, #2 \n"
- "blt 3f \n"
-
- "cmp %5, #4 \n"
- "blt 2f \n"
-
- // 4x8 block
- "mov %0, %1 \n"
- "vld1.32 {d0[0]}, [%0], %2 \n"
- "vld1.32 {d0[1]}, [%0], %2 \n"
- "vld1.32 {d1[0]}, [%0], %2 \n"
- "vld1.32 {d1[1]}, [%0], %2 \n"
- "vld1.32 {d2[0]}, [%0], %2 \n"
- "vld1.32 {d2[1]}, [%0], %2 \n"
- "vld1.32 {d3[0]}, [%0], %2 \n"
- "vld1.32 {d3[1]}, [%0] \n"
-
- "mov %0, %3 \n"
-
- "vld1.8 {q3}, [%6] \n"
-
- "vtbl.8 d4, {d0, d1}, d6 \n"
- "vtbl.8 d5, {d0, d1}, d7 \n"
- "vtbl.8 d0, {d2, d3}, d6 \n"
- "vtbl.8 d1, {d2, d3}, d7 \n"
-
- // TODO(frkoenig): Rework shuffle above to
- // write out with 4 instead of 8 writes.
- "vst1.32 {d4[0]}, [%0], %4 \n"
- "vst1.32 {d4[1]}, [%0], %4 \n"
- "vst1.32 {d5[0]}, [%0], %4 \n"
- "vst1.32 {d5[1]}, [%0] \n"
-
- "add %0, %3, #4 \n"
- "vst1.32 {d0[0]}, [%0], %4 \n"
- "vst1.32 {d0[1]}, [%0], %4 \n"
- "vst1.32 {d1[0]}, [%0], %4 \n"
- "vst1.32 {d1[1]}, [%0] \n"
-
- "add %1, #4 \n" // src += 4
- "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
- "subs %5, #4 \n" // w -= 4
- "beq 4f \n"
-
- // some residual, check to see if it includes a 2x8 block,
- // or less
- "cmp %5, #2 \n"
- "blt 3f \n"
-
- // 2x8 block
- "2: \n"
- "mov %0, %1 \n"
- "vld1.16 {d0[0]}, [%0], %2 \n"
- "vld1.16 {d1[0]}, [%0], %2 \n"
- "vld1.16 {d0[1]}, [%0], %2 \n"
- "vld1.16 {d1[1]}, [%0], %2 \n"
- "vld1.16 {d0[2]}, [%0], %2 \n"
- "vld1.16 {d1[2]}, [%0], %2 \n"
- "vld1.16 {d0[3]}, [%0], %2 \n"
- "vld1.16 {d1[3]}, [%0] \n"
-
- "vtrn.8 d0, d1 \n"
-
- "mov %0, %3 \n"
-
- "vst1.64 {d0}, [%0], %4 \n"
- "vst1.64 {d1}, [%0] \n"
-
- "add %1, #2 \n" // src += 2
- "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
- "subs %5, #2 \n" // w -= 2
- "beq 4f \n"
-
- // 1x8 block
- "3: \n"
- "vld1.8 {d0[0]}, [%1], %2 \n"
- "vld1.8 {d0[1]}, [%1], %2 \n"
- "vld1.8 {d0[2]}, [%1], %2 \n"
- "vld1.8 {d0[3]}, [%1], %2 \n"
- "vld1.8 {d0[4]}, [%1], %2 \n"
- "vld1.8 {d0[5]}, [%1], %2 \n"
- "vld1.8 {d0[6]}, [%1], %2 \n"
- "vld1.8 {d0[7]}, [%1] \n"
-
- "vst1.64 {d0}, [%3] \n"
-
- "4: \n"
-
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(src_stride), // %2
- "+r"(dst), // %3
- "+r"(dst_stride), // %4
- "+r"(width) // %5
- : "r"(&kVTbl4x4Transpose) // %6
- : "memory", "cc", "q0", "q1", "q2", "q3"
- );
-}
-
-static uvec8 kVTbl4x4TransposeDi =
- { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
-
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) {
- const uint8* src_temp = NULL;
- asm volatile (
- // loops are on blocks of 8. loop will stop when
- // counter gets to or below 0. starting the counter
- // at w-8 allow for this
- "sub %7, #8 \n"
-
- // handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
- "1: \n"
- "mov %0, %1 \n"
-
- "vld2.8 {d0, d1}, [%0], %2 \n"
- "vld2.8 {d2, d3}, [%0], %2 \n"
- "vld2.8 {d4, d5}, [%0], %2 \n"
- "vld2.8 {d6, d7}, [%0], %2 \n"
- "vld2.8 {d16, d17}, [%0], %2 \n"
- "vld2.8 {d18, d19}, [%0], %2 \n"
- "vld2.8 {d20, d21}, [%0], %2 \n"
- "vld2.8 {d22, d23}, [%0] \n"
-
- "vtrn.8 q1, q0 \n"
- "vtrn.8 q3, q2 \n"
- "vtrn.8 q9, q8 \n"
- "vtrn.8 q11, q10 \n"
-
- "vtrn.16 q1, q3 \n"
- "vtrn.16 q0, q2 \n"
- "vtrn.16 q9, q11 \n"
- "vtrn.16 q8, q10 \n"
-
- "vtrn.32 q1, q9 \n"
- "vtrn.32 q0, q8 \n"
- "vtrn.32 q3, q11 \n"
- "vtrn.32 q2, q10 \n"
-
- "vrev16.8 q0, q0 \n"
- "vrev16.8 q1, q1 \n"
- "vrev16.8 q2, q2 \n"
- "vrev16.8 q3, q3 \n"
- "vrev16.8 q8, q8 \n"
- "vrev16.8 q9, q9 \n"
- "vrev16.8 q10, q10 \n"
- "vrev16.8 q11, q11 \n"
-
- "mov %0, %3 \n"
-
- "vst1.8 {d2}, [%0], %4 \n"
- "vst1.8 {d0}, [%0], %4 \n"
- "vst1.8 {d6}, [%0], %4 \n"
- "vst1.8 {d4}, [%0], %4 \n"
- "vst1.8 {d18}, [%0], %4 \n"
- "vst1.8 {d16}, [%0], %4 \n"
- "vst1.8 {d22}, [%0], %4 \n"
- "vst1.8 {d20}, [%0] \n"
-
- "mov %0, %5 \n"
-
- "vst1.8 {d3}, [%0], %6 \n"
- "vst1.8 {d1}, [%0], %6 \n"
- "vst1.8 {d7}, [%0], %6 \n"
- "vst1.8 {d5}, [%0], %6 \n"
- "vst1.8 {d19}, [%0], %6 \n"
- "vst1.8 {d17}, [%0], %6 \n"
- "vst1.8 {d23}, [%0], %6 \n"
- "vst1.8 {d21}, [%0] \n"
-
- "add %1, #8*2 \n" // src += 8*2
- "add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
- "add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
- "subs %7, #8 \n" // w -= 8
- "bge 1b \n"
-
- // add 8 back to counter. if the result is 0 there are
- // no residuals.
- "adds %7, #8 \n"
- "beq 4f \n"
-
- // some residual, so between 1 and 7 lines left to transpose
- "cmp %7, #2 \n"
- "blt 3f \n"
-
- "cmp %7, #4 \n"
- "blt 2f \n"
-
- //TODO(frkoenig): Clean this up
- // 4x8 block
- "mov %0, %1 \n"
- "vld1.64 {d0}, [%0], %2 \n"
- "vld1.64 {d1}, [%0], %2 \n"
- "vld1.64 {d2}, [%0], %2 \n"
- "vld1.64 {d3}, [%0], %2 \n"
- "vld1.64 {d4}, [%0], %2 \n"
- "vld1.64 {d5}, [%0], %2 \n"
- "vld1.64 {d6}, [%0], %2 \n"
- "vld1.64 {d7}, [%0] \n"
-
- "vld1.8 {q15}, [%8] \n"
-
- "vtrn.8 q0, q1 \n"
- "vtrn.8 q2, q3 \n"
-
- "vtbl.8 d16, {d0, d1}, d30 \n"
- "vtbl.8 d17, {d0, d1}, d31 \n"
- "vtbl.8 d18, {d2, d3}, d30 \n"
- "vtbl.8 d19, {d2, d3}, d31 \n"
- "vtbl.8 d20, {d4, d5}, d30 \n"
- "vtbl.8 d21, {d4, d5}, d31 \n"
- "vtbl.8 d22, {d6, d7}, d30 \n"
- "vtbl.8 d23, {d6, d7}, d31 \n"
-
- "mov %0, %3 \n"
-
- "vst1.32 {d16[0]}, [%0], %4 \n"
- "vst1.32 {d16[1]}, [%0], %4 \n"
- "vst1.32 {d17[0]}, [%0], %4 \n"
- "vst1.32 {d17[1]}, [%0], %4 \n"
-
- "add %0, %3, #4 \n"
- "vst1.32 {d20[0]}, [%0], %4 \n"
- "vst1.32 {d20[1]}, [%0], %4 \n"
- "vst1.32 {d21[0]}, [%0], %4 \n"
- "vst1.32 {d21[1]}, [%0] \n"
-
- "mov %0, %5 \n"
-
- "vst1.32 {d18[0]}, [%0], %6 \n"
- "vst1.32 {d18[1]}, [%0], %6 \n"
- "vst1.32 {d19[0]}, [%0], %6 \n"
- "vst1.32 {d19[1]}, [%0], %6 \n"
-
- "add %0, %5, #4 \n"
- "vst1.32 {d22[0]}, [%0], %6 \n"
- "vst1.32 {d22[1]}, [%0], %6 \n"
- "vst1.32 {d23[0]}, [%0], %6 \n"
- "vst1.32 {d23[1]}, [%0] \n"
-
- "add %1, #4*2 \n" // src += 4 * 2
- "add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
- "add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
- "subs %7, #4 \n" // w -= 4
- "beq 4f \n"
-
- // some residual, check to see if it includes a 2x8 block,
- // or less
- "cmp %7, #2 \n"
- "blt 3f \n"
-
- // 2x8 block
- "2: \n"
- "mov %0, %1 \n"
- "vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
- "vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
- "vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
- "vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
- "vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
- "vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
- "vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
- "vld2.16 {d1[3], d3[3]}, [%0] \n"
-
- "vtrn.8 d0, d1 \n"
- "vtrn.8 d2, d3 \n"
-
- "mov %0, %3 \n"
-
- "vst1.64 {d0}, [%0], %4 \n"
- "vst1.64 {d2}, [%0] \n"
-
- "mov %0, %5 \n"
-
- "vst1.64 {d1}, [%0], %6 \n"
- "vst1.64 {d3}, [%0] \n"
-
- "add %1, #2*2 \n" // src += 2 * 2
- "add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
- "add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
- "subs %7, #2 \n" // w -= 2
- "beq 4f \n"
-
- // 1x8 block
- "3: \n"
- "vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
- "vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
- "vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
- "vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
- "vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
- "vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
- "vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
- "vld2.8 {d0[7], d1[7]}, [%1] \n"
-
- "vst1.64 {d0}, [%3] \n"
- "vst1.64 {d1}, [%5] \n"
-
- "4: \n"
-
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(src_stride), // %2
- "+r"(dst_a), // %3
- "+r"(dst_stride_a), // %4
- "+r"(dst_b), // %5
- "+r"(dst_stride_b), // %6
- "+r"(width) // %7
- : "r"(&kVTbl4x4TransposeDi) // %8
- : "memory", "cc",
- "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
- );
-}
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc
deleted file mode 100755
index 90c6a3ff5f..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc
+++ /dev/null
@@ -1,542 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
-// TODO(fbarchard): Consider 'any' functions handling odd alignment.
-// YUV to RGB does multiple of 8 with SIMD and remainder with C.
-#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, \
- const uint8* u_buf, \
- const uint8* v_buf, \
- uint8* rgb_buf, \
- int width) { \
- int n = width & ~MASK; \
- I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \
- I420TORGB_C(y_buf + n, \
- u_buf + (n >> UV_SHIFT), \
- v_buf + (n >> UV_SHIFT), \
- rgb_buf + n * BPP, width & MASK); \
- }
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
- 0, 4, 7)
-YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
- 1, 4, 7)
-YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
- 2, 4, 7)
-YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
- 1, 4, 7)
-YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
- 1, 4, 7)
-YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
- 1, 4, 7)
-// I422ToRGB565Row_SSSE3 is unaligned.
-YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C,
- 1, 2, 7)
-YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C,
- 1, 2, 7)
-YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C,
- 1, 2, 7)
-// I422ToRGB24Row_SSSE3 is unaligned.
-YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
-YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
-YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
-YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
-#endif // HAS_I422TOARGBROW_SSSE3
-#ifdef HAS_I422TOARGBROW_AVX2
-YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
-#endif // HAS_I422TOARGBROW_AVX2
-#ifdef HAS_I422TOARGBROW_NEON
-YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
-YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
-YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
-YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
-YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
-YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
-YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
-YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
-YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C,
- 1, 2, 7)
-YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
- 1, 2, 7)
-YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
-YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
-YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
-#endif // HAS_I422TOARGBROW_NEON
-#undef YANY
-
-// Wrappers to handle odd width
-#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \
- void NAMEANY(const uint8* y_buf, \
- const uint8* uv_buf, \
- uint8* rgb_buf, \
- int width) { \
- int n = width & ~7; \
- NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n); \
- NV12TORGB_C(y_buf + n, \
- uv_buf + (n >> UV_SHIFT), \
- rgb_buf + n * BPP, width & 7); \
- }
-
-#ifdef HAS_NV12TOARGBROW_SSSE3
-NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
- 0, 4)
-NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
- 0, 4)
-#endif // HAS_NV12TOARGBROW_SSSE3
-#ifdef HAS_NV12TOARGBROW_NEON
-NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
-NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
-#endif // HAS_NV12TOARGBROW_NEON
-#ifdef HAS_NV12TORGB565ROW_SSSE3
-NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
- 0, 2)
-NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
- 0, 2)
-#endif // HAS_NV12TORGB565ROW_SSSE3
-#ifdef HAS_NV12TORGB565ROW_NEON
-NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2)
-NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
-#endif // HAS_NV12TORGB565ROW_NEON
-#undef NVANY
-
-#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
- void NAMEANY(const uint8* src, \
- uint8* dst, \
- int width) { \
- int n = width & ~MASK; \
- ARGBTORGB_SIMD(src, dst, n); \
- ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \
- }
-
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
-RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C,
- 15, 4, 3)
-RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C,
- 15, 4, 3)
-RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C,
- 3, 4, 2)
-RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C,
- 3, 4, 2)
-RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
- 3, 4, 2)
-#endif
-#if defined(HAS_I400TOARGBROW_SSE2)
-RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
- 7, 1, 4)
-#endif
-#if defined(HAS_YTOARGBROW_SSE2)
-RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C,
- 7, 1, 4)
-RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
- 15, 2, 4)
-RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
- 15, 2, 4)
-// These require alignment on ARGB, so C is used for remainder.
-RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
- 15, 3, 4)
-RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C,
- 15, 3, 4)
-RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
- 7, 2, 4)
-RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
- 7, 2, 4)
-RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
- 7, 2, 4)
-#endif
-#if defined(HAS_ARGBTORGB24ROW_NEON)
-RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
-RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3)
-RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C,
- 7, 4, 2)
-RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C,
- 7, 4, 2)
-RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
- 7, 4, 2)
-RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
- 7, 1, 4)
-RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C,
- 7, 1, 4)
-RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
- 7, 2, 4)
-RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
- 7, 2, 4)
-#endif
-#undef RGBANY
-
-// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
-#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
- void NAMEANY(const uint8* src, \
- uint8* dst, uint32 selector, \
- int width) { \
- int n = width & ~MASK; \
- ARGBTORGB_SIMD(src, dst, selector, n); \
- ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \
- }
-
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
-BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
- 7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERROW_NEON)
-BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
- 7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
-BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C,
- 7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_NEON)
-BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
- 7, 4, 1)
-#endif
-
-#undef BAYERANY
-
-// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
-#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \
- void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
- ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
- ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \
- dst_y + (width - NUM) * BPP, NUM); \
- }
-
-#ifdef HAS_ARGBTOYROW_AVX2
-YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
-YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32)
-YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
-YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
-#endif
-#ifdef HAS_ARGBTOYROW_SSSE3
-YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
-#endif
-#ifdef HAS_BGRATOYROW_SSSE3
-YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
-YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
-#endif
-#ifdef HAS_ARGBTOYJROW_SSSE3
-YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16)
-#endif
-#ifdef HAS_ARGBTOYROW_NEON
-YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
-YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8)
-YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
-YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
-YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
-YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
-YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
-YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
-YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
-YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
-YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
-YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
-YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
-YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
-YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
-YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
-YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
-#endif
-#undef YANY
-
-#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
- int n = width & ~MASK; \
- ARGBTOY_SIMD(src_argb, dst_y, n); \
- ARGBTOY_C(src_argb + n * SBPP, \
- dst_y + n * BPP, width & MASK); \
- }
-
-// Attenuate is destructive so last16 method can not be used due to overlap.
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
- 4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C,
- 4, 4, 3)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C,
- 4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C,
- 4, 4, 7)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C,
- 4, 4, 7)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_NEON
-YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
- 4, 4, 7)
-#endif
-#undef YANY
-
-// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
-#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
- void NAMEANY(const uint8* src_argb, int src_stride_argb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \
- ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \
- dst_u + (n >> 1), \
- dst_v + (n >> 1), \
- width & MASK); \
- }
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31)
-UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31)
-UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
-#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
-UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
-UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
- 4, 15)
-UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
-UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
-UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
-UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15)
-UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
-#endif
-#ifdef HAS_ARGBTOUVROW_NEON
-UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
-UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
-UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
-UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
-UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
-UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15)
-UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
-UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
-UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
-UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
-UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
-UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
-#endif
-#undef UVANY
-
-#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
- void NAMEANY(const uint8* src_uv, \
- uint8* dst_u, uint8* dst_v, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
- ANYTOUV_C(src_uv + n * BPP, \
- dst_u + (n >> SHIFT), \
- dst_v + (n >> SHIFT), \
- width & MASK); \
- }
-
-#ifdef HAS_ARGBTOUV444ROW_SSSE3
-UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
- ARGBToUV444Row_C, 4, 15, 0)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_AVX2
-UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2,
- YUY2ToUV422Row_C, 2, 31, 1)
-UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
- UYVYToUV422Row_C, 2, 31, 1)
-#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
-UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
- ARGBToUV422Row_C, 4, 15, 1)
-UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
- YUY2ToUV422Row_C, 2, 15, 1)
-UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
- UYVYToUV422Row_C, 2, 15, 1)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_NEON
-UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
- ARGBToUV444Row_C, 4, 7, 0)
-UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
- ARGBToUV422Row_C, 4, 15, 1)
-UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
- ARGBToUV411Row_C, 4, 31, 2)
-UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
- YUY2ToUV422Row_C, 2, 15, 1)
-UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
- UYVYToUV422Row_C, 2, 15, 1)
-#endif
-#undef UV422ANY
-
-#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
- void NAMEANY(const uint8* src_uv, \
- uint8* dst_u, uint8* dst_v, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
- ANYTOUV_C(src_uv + n * 2, \
- dst_u + n, \
- dst_v + n, \
- width & MASK); \
- }
-
-#ifdef HAS_SPLITUVROW_SSE2
-SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
-#endif
-#ifdef HAS_SPLITUVROW_AVX2
-SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
-#endif
-#ifdef HAS_SPLITUVROW_NEON
-SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
-#endif
-#ifdef HAS_SPLITUVROW_MIPS_DSPR2
-SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
- SplitUVRow_C, 15)
-#endif
-#undef SPLITUVROWANY
-
-#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
- void NAMEANY(const uint8* src_u, const uint8* src_v, \
- uint8* dst_uv, int width) { \
- int n = width & ~MASK; \
- ANYTOUV_SIMD(src_u, src_v, dst_uv, n); \
- ANYTOUV_C(src_u + n, \
- src_v + n, \
- dst_uv + n * 2, \
- width & MASK); \
- }
-
-#ifdef HAS_MERGEUVROW_SSE2
-MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
-#endif
-#ifdef HAS_MERGEUVROW_AVX2
-MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31)
-#endif
-#ifdef HAS_MERGEUVROW_NEON
-MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
-#endif
-#undef MERGEUVROW_ANY
-
-#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \
- void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \
- uint8* dst_argb, int width) { \
- int n = width & ~MASK; \
- ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \
- ARGBMATH_C(src_argb0 + n * 4, \
- src_argb1 + n * 4, \
- dst_argb + n * 4, \
- width & MASK); \
- }
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
- 3)
-#endif
-#ifdef HAS_ARGBADDROW_SSE2
-MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
- 3)
-#endif
-#ifdef HAS_ARGBMULTIPLYROW_AVX2
-MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
- 7)
-#endif
-#ifdef HAS_ARGBADDROW_AVX2
-MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_AVX2
-MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
- 7)
-#endif
-#ifdef HAS_ARGBMULTIPLYROW_NEON
-MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
- 7)
-#endif
-#ifdef HAS_ARGBADDROW_NEON
-MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_NEON
-MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
- 7)
-#endif
-#undef MATHROW_ANY
-
-// Shuffle may want to work in place, so last16 method can not be used.
-#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_argb, uint8* dst_argb, \
- const uint8* shuffler, int width) { \
- int n = width & ~MASK; \
- ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n); \
- ARGBTOY_C(src_argb + n * SBPP, \
- dst_argb + n * BPP, shuffler, width & MASK); \
- }
-
-#ifdef HAS_ARGBSHUFFLEROW_SSE2
-YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2,
- ARGBShuffleRow_C, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3,
- ARGBShuffleRow_C, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2,
- ARGBShuffleRow_C, 4, 4, 15)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_NEON
-YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON,
- ARGBShuffleRow_C, 4, 4, 3)
-#endif
-#undef YANY
-
-// Interpolate may want to work in place, so last16 method can not be used.
-#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK) \
- void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
- ptrdiff_t src_stride_ptr, int width, \
- int source_y_fraction) { \
- int n = width & ~MASK; \
- TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr, \
- n, source_y_fraction); \
- TERP_C(dst_ptr + n * BPP, \
- src_ptr + n * SBPP, src_stride_ptr, \
- width & MASK, source_y_fraction); \
- }
-
-#ifdef HAS_INTERPOLATEROW_AVX2
-NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2,
- InterpolateRow_C, 1, 1, 32)
-#endif
-#ifdef HAS_INTERPOLATEROW_SSSE3
-NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
- InterpolateRow_C, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_SSE2
-NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
- InterpolateRow_C, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_NEON
-NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON,
- InterpolateRow_C, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
-NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2,
- InterpolateRow_C, 1, 1, 3)
-#endif
-#undef NANY
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc
deleted file mode 100755
index 135bdc9084..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc
+++ /dev/null
@@ -1,2247 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include <string.h> // For memcpy and memset.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// llvm x86 is poor at ternary operator, so use branchless min/max.
-
-#define USE_BRANCHLESS 1
-#if USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
- return ((-(v) >> 31) & (v));
-}
-
-static __inline int32 clamp255(int32 v) {
- return (((255 - (v)) >> 31) | (v)) & 255;
-}
-
-static __inline uint32 Clamp(int32 val) {
- int v = clamp0(val);
- return (uint32)(clamp255(v));
-}
-
-static __inline uint32 Abs(int32 v) {
- int m = v >> 31;
- return (v + m) ^ m;
-}
-#else // USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
- return (v < 0) ? 0 : v;
-}
-
-static __inline int32 clamp255(int32 v) {
- return (v > 255) ? 255 : v;
-}
-
-static __inline uint32 Clamp(int32 val) {
- int v = clamp0(val);
- return (uint32)(clamp255(v));
-}
-
-static __inline uint32 Abs(int32 v) {
- return (v < 0) ? -v : v;
-}
-#endif // USE_BRANCHLESS
-
-#ifdef LIBYUV_LITTLE_ENDIAN
-#define WRITEWORD(p, v) *(uint32*)(p) = v
-#else
-static inline void WRITEWORD(uint8* p, uint32 v) {
- p[0] = (uint8)(v & 255);
- p[1] = (uint8)((v >> 8) & 255);
- p[2] = (uint8)((v >> 16) & 255);
- p[3] = (uint8)((v >> 24) & 255);
-}
-#endif
-
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_rgb24[0];
- uint8 g = src_rgb24[1];
- uint8 r = src_rgb24[2];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = 255u;
- dst_argb += 4;
- src_rgb24 += 3;
- }
-}
-
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 r = src_raw[0];
- uint8 g = src_raw[1];
- uint8 b = src_raw[2];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = 255u;
- dst_argb += 4;
- src_raw += 3;
- }
-}
-
-void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_rgb565[0] & 0x1f;
- uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r = src_rgb565[1] >> 3;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 2) | (g >> 4);
- dst_argb[2] = (r << 3) | (r >> 2);
- dst_argb[3] = 255u;
- dst_argb += 4;
- src_rgb565 += 2;
- }
-}
-
-void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
- int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb1555[0] & 0x1f;
- uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r = (src_argb1555[1] & 0x7c) >> 2;
- uint8 a = src_argb1555[1] >> 7;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 3) | (g >> 2);
- dst_argb[2] = (r << 3) | (r >> 2);
- dst_argb[3] = -a;
- dst_argb += 4;
- src_argb1555 += 2;
- }
-}
-
-void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
- int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb4444[0] & 0x0f;
- uint8 g = src_argb4444[0] >> 4;
- uint8 r = src_argb4444[1] & 0x0f;
- uint8 a = src_argb4444[1] >> 4;
- dst_argb[0] = (b << 4) | b;
- dst_argb[1] = (g << 4) | g;
- dst_argb[2] = (r << 4) | r;
- dst_argb[3] = (a << 4) | a;
- dst_argb += 4;
- src_argb4444 += 2;
- }
-}
-
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb[0];
- uint8 g = src_argb[1];
- uint8 r = src_argb[2];
- dst_rgb[0] = b;
- dst_rgb[1] = g;
- dst_rgb[2] = r;
- dst_rgb += 3;
- src_argb += 4;
- }
-}
-
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb[0];
- uint8 g = src_argb[1];
- uint8 r = src_argb[2];
- dst_rgb[0] = r;
- dst_rgb[1] = g;
- dst_rgb[2] = b;
- dst_rgb += 3;
- src_argb += 4;
- }
-}
-
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 2;
- uint8 r0 = src_argb[2] >> 3;
- uint8 b1 = src_argb[4] >> 3;
- uint8 g1 = src_argb[5] >> 2;
- uint8 r1 = src_argb[6] >> 3;
- WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27));
- dst_rgb += 4;
- src_argb += 8;
- }
- if (width & 1) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 2;
- uint8 r0 = src_argb[2] >> 3;
- *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 3;
- uint8 r0 = src_argb[2] >> 3;
- uint8 a0 = src_argb[3] >> 7;
- uint8 b1 = src_argb[4] >> 3;
- uint8 g1 = src_argb[5] >> 3;
- uint8 r1 = src_argb[6] >> 3;
- uint8 a1 = src_argb[7] >> 7;
- *(uint32*)(dst_rgb) =
- b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
- (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
- dst_rgb += 4;
- src_argb += 8;
- }
- if (width & 1) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 3;
- uint8 r0 = src_argb[2] >> 3;
- uint8 a0 = src_argb[3] >> 7;
- *(uint16*)(dst_rgb) =
- b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
- }
-}
-
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 4;
- uint8 g0 = src_argb[1] >> 4;
- uint8 r0 = src_argb[2] >> 4;
- uint8 a0 = src_argb[3] >> 4;
- uint8 b1 = src_argb[4] >> 4;
- uint8 g1 = src_argb[5] >> 4;
- uint8 r1 = src_argb[6] >> 4;
- uint8 a1 = src_argb[7] >> 4;
- *(uint32*)(dst_rgb) =
- b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
- (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
- dst_rgb += 4;
- src_argb += 8;
- }
- if (width & 1) {
- uint8 b0 = src_argb[0] >> 4;
- uint8 g0 = src_argb[1] >> 4;
- uint8 r0 = src_argb[2] >> 4;
- uint8 a0 = src_argb[3] >> 4;
- *(uint16*)(dst_rgb) =
- b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
- }
-}
-
-static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
- return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
-}
-
-static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
- return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
-}
-static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
- return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
-}
-
-#define MAKEROWY(NAME, R, G, B, BPP) \
-void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
-} \
-void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \
- src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \
- uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \
- src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \
- uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \
- src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
- uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
- uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- } \
-}
-
-MAKEROWY(ARGB, 2, 1, 0, 4)
-MAKEROWY(BGRA, 1, 2, 3, 4)
-MAKEROWY(ABGR, 0, 1, 2, 4)
-MAKEROWY(RGBA, 3, 2, 1, 4)
-MAKEROWY(RGB24, 2, 1, 0, 3)
-MAKEROWY(RAW, 0, 1, 2, 3)
-#undef MAKEROWY
-
-// JPeg uses a variation on BT.601-1 full range
-// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
-// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
-// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
-// BT.601 Mpeg range uses:
-// b 0.1016 * 255 = 25.908 = 25
-// g 0.5078 * 255 = 129.489 = 129
-// r 0.2578 * 255 = 65.739 = 66
-// JPeg 8 bit Y (not used):
-// b 0.11400 * 256 = 29.184 = 29
-// g 0.58700 * 256 = 150.272 = 150
-// r 0.29900 * 256 = 76.544 = 77
-// JPeg 7 bit Y:
-// b 0.11400 * 128 = 14.592 = 15
-// g 0.58700 * 128 = 75.136 = 75
-// r 0.29900 * 128 = 38.272 = 38
-// JPeg 8 bit U:
-// b 0.50000 * 255 = 127.5 = 127
-// g -0.33126 * 255 = -84.4713 = -84
-// r -0.16874 * 255 = -43.0287 = -43
-// JPeg 8 bit V:
-// b -0.08131 * 255 = -20.73405 = -20
-// g -0.41869 * 255 = -106.76595 = -107
-// r 0.50000 * 255 = 127.5 = 127
-
-static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
- return (38 * r + 75 * g + 15 * b + 64) >> 7;
-}
-
-static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
- return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
-}
-static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
- return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
-}
-
-#define AVGB(a, b) (((a) + (b) + 1) >> 1)
-
-#define MAKEROWYJ(NAME, R, G, B, BPP) \
-void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
-} \
-void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
- AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
- uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
- AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
- uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
- AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
- uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
- uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- } \
-}
-
-MAKEROWYJ(ARGB, 2, 1, 0, 4)
-#undef MAKEROWYJ
-
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_rgb565[0] & 0x1f;
- uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r = src_rgb565[1] >> 3;
- b = (b << 3) | (b >> 2);
- g = (g << 2) | (g >> 4);
- r = (r << 3) | (r >> 2);
- dst_y[0] = RGBToY(r, g, b);
- src_rgb565 += 2;
- dst_y += 1;
- }
-}
-
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb1555[0] & 0x1f;
- uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r = (src_argb1555[1] & 0x7c) >> 2;
- b = (b << 3) | (b >> 2);
- g = (g << 3) | (g >> 2);
- r = (r << 3) | (r >> 2);
- dst_y[0] = RGBToY(r, g, b);
- src_argb1555 += 2;
- dst_y += 1;
- }
-}
-
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb4444[0] & 0x0f;
- uint8 g = src_argb4444[0] >> 4;
- uint8 r = src_argb4444[1] & 0x0f;
- b = (b << 4) | b;
- g = (g << 4) | g;
- r = (r << 4) | r;
- dst_y[0] = RGBToY(r, g, b);
- src_argb4444 += 2;
- dst_y += 1;
- }
-}
-
-void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int width) {
- const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_rgb565[0] & 0x1f;
- uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r0 = src_rgb565[1] >> 3;
- uint8 b1 = src_rgb565[2] & 0x1f;
- uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
- uint8 r1 = src_rgb565[3] >> 3;
- uint8 b2 = next_rgb565[0] & 0x1f;
- uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8 r2 = next_rgb565[1] >> 3;
- uint8 b3 = next_rgb565[2] & 0x1f;
- uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
- uint8 r3 = next_rgb565[3] >> 3;
- uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
- b = (b << 1) | (b >> 6); // 787 -> 888.
- r = (r << 1) | (r >> 6);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- src_rgb565 += 4;
- next_rgb565 += 4;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 b0 = src_rgb565[0] & 0x1f;
- uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r0 = src_rgb565[1] >> 3;
- uint8 b2 = next_rgb565[0] & 0x1f;
- uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8 r2 = next_rgb565[1] >> 3;
- uint8 b = (b0 + b2); // 565 * 2 = 676.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
- b = (b << 2) | (b >> 4); // 676 -> 888
- g = (g << 1) | (g >> 6);
- r = (r << 2) | (r >> 4);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- }
-}
-
-void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int width) {
- const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb1555[0] & 0x1f;
- uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8 b1 = src_argb1555[2] & 0x1f;
- uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
- uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
- uint8 b2 = next_argb1555[0] & 0x1f;
- uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
- uint8 b3 = next_argb1555[2] & 0x1f;
- uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
- uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
- uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
- b = (b << 1) | (b >> 6); // 777 -> 888.
- g = (g << 1) | (g >> 6);
- r = (r << 1) | (r >> 6);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- src_argb1555 += 4;
- next_argb1555 += 4;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 b0 = src_argb1555[0] & 0x1f;
- uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8 b2 = next_argb1555[0] & 0x1f;
- uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8 r2 = next_argb1555[1] >> 3;
- uint8 b = (b0 + b2); // 555 * 2 = 666.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
- b = (b << 2) | (b >> 4); // 666 -> 888.
- g = (g << 2) | (g >> 4);
- r = (r << 2) | (r >> 4);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- }
-}
-
-void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int width) {
- const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb4444[0] & 0x0f;
- uint8 g0 = src_argb4444[0] >> 4;
- uint8 r0 = src_argb4444[1] & 0x0f;
- uint8 b1 = src_argb4444[2] & 0x0f;
- uint8 g1 = src_argb4444[2] >> 4;
- uint8 r1 = src_argb4444[3] & 0x0f;
- uint8 b2 = next_argb4444[0] & 0x0f;
- uint8 g2 = next_argb4444[0] >> 4;
- uint8 r2 = next_argb4444[1] & 0x0f;
- uint8 b3 = next_argb4444[2] & 0x0f;
- uint8 g3 = next_argb4444[2] >> 4;
- uint8 r3 = next_argb4444[3] & 0x0f;
- uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
- b = (b << 2) | (b >> 4); // 666 -> 888.
- g = (g << 2) | (g >> 4);
- r = (r << 2) | (r >> 4);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- src_argb4444 += 4;
- next_argb4444 += 4;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 b0 = src_argb4444[0] & 0x0f;
- uint8 g0 = src_argb4444[0] >> 4;
- uint8 r0 = src_argb4444[1] & 0x0f;
- uint8 b2 = next_argb4444[0] & 0x0f;
- uint8 g2 = next_argb4444[0] >> 4;
- uint8 r2 = next_argb4444[1] & 0x0f;
- uint8 b = (b0 + b2); // 444 * 2 = 555.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
- b = (b << 3) | (b >> 2); // 555 -> 888.
- g = (g << 3) | (g >> 2);
- r = (r << 3) | (r >> 2);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- }
-}
-
-void ARGBToUV444Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- src_argb += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-void ARGBToUV422Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
- uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
- uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- src_argb += 8;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- }
-}
-
-void ARGBToUV411Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width - 3; x += 4) {
- uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
- uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
- uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- src_argb += 16;
- dst_u += 1;
- dst_v += 1;
- }
- if ((width & 3) == 3) {
- uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
- uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
- uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- } else if ((width & 3) == 2) {
- uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
- uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
- uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- } else if ((width & 3) == 1) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- }
-}
-
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
- dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
- dst_argb[3] = src_argb[3];
- dst_argb += 4;
- src_argb += 4;
- }
-}
-
-// Convert a row of image to Sepia tone.
-void ARGBSepiaRow_C(uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- int sb = (b * 17 + g * 68 + r * 35) >> 7;
- int sg = (b * 22 + g * 88 + r * 45) >> 7;
- int sr = (b * 24 + g * 98 + r * 50) >> 7;
- // b does not over flow. a is preserved from original.
- dst_argb[0] = sb;
- dst_argb[1] = clamp255(sg);
- dst_argb[2] = clamp255(sr);
- dst_argb += 4;
- }
-}
-
-// Apply color matrix to a row of image. Matrix is signed.
-// TODO(fbarchard): Consider adding rounding (+32).
-void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = src_argb[0];
- int g = src_argb[1];
- int r = src_argb[2];
- int a = src_argb[3];
- int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
- r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
- int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
- r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
- int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
- r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
- int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
- r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
- dst_argb[0] = Clamp(sb);
- dst_argb[1] = Clamp(sg);
- dst_argb[2] = Clamp(sr);
- dst_argb[3] = Clamp(sa);
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-// Apply color table to a row of image.
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- int a = dst_argb[3];
- dst_argb[0] = table_argb[b * 4 + 0];
- dst_argb[1] = table_argb[g * 4 + 1];
- dst_argb[2] = table_argb[r * 4 + 2];
- dst_argb[3] = table_argb[a * 4 + 3];
- dst_argb += 4;
- }
-}
-
-// Apply color table to a row of image.
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- dst_argb[0] = table_argb[b * 4 + 0];
- dst_argb[1] = table_argb[g * 4 + 1];
- dst_argb[2] = table_argb[r * 4 + 2];
- dst_argb += 4;
- }
-}
-
-void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
- dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
- dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
- dst_argb += 4;
- }
-}
-
-#define REPEAT8(v) (v) | ((v) << 8)
-#define SHADE(f, v) v * f >> 24
-
-void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- const uint32 b_scale = REPEAT8(value & 0xff);
- const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
- const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
- const uint32 a_scale = REPEAT8(value >> 24);
-
- int i;
- for (i = 0; i < width; ++i) {
- const uint32 b = REPEAT8(src_argb[0]);
- const uint32 g = REPEAT8(src_argb[1]);
- const uint32 r = REPEAT8(src_argb[2]);
- const uint32 a = REPEAT8(src_argb[3]);
- dst_argb[0] = SHADE(b, b_scale);
- dst_argb[1] = SHADE(g, g_scale);
- dst_argb[2] = SHADE(r, r_scale);
- dst_argb[3] = SHADE(a, a_scale);
- src_argb += 4;
- dst_argb += 4;
- }
-}
-#undef REPEAT8
-#undef SHADE
-
-#define REPEAT8(v) (v) | ((v) << 8)
-#define SHADE(f, v) v * f >> 16
-
-void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- const uint32 b = REPEAT8(src_argb0[0]);
- const uint32 g = REPEAT8(src_argb0[1]);
- const uint32 r = REPEAT8(src_argb0[2]);
- const uint32 a = REPEAT8(src_argb0[3]);
- const uint32 b_scale = src_argb1[0];
- const uint32 g_scale = src_argb1[1];
- const uint32 r_scale = src_argb1[2];
- const uint32 a_scale = src_argb1[3];
- dst_argb[0] = SHADE(b, b_scale);
- dst_argb[1] = SHADE(g, g_scale);
- dst_argb[2] = SHADE(r, r_scale);
- dst_argb[3] = SHADE(a, a_scale);
- src_argb0 += 4;
- src_argb1 += 4;
- dst_argb += 4;
- }
-}
-#undef REPEAT8
-#undef SHADE
-
-#define SHADE(f, v) clamp255(v + f)
-
-void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- const int b = src_argb0[0];
- const int g = src_argb0[1];
- const int r = src_argb0[2];
- const int a = src_argb0[3];
- const int b_add = src_argb1[0];
- const int g_add = src_argb1[1];
- const int r_add = src_argb1[2];
- const int a_add = src_argb1[3];
- dst_argb[0] = SHADE(b, b_add);
- dst_argb[1] = SHADE(g, g_add);
- dst_argb[2] = SHADE(r, r_add);
- dst_argb[3] = SHADE(a, a_add);
- src_argb0 += 4;
- src_argb1 += 4;
- dst_argb += 4;
- }
-}
-#undef SHADE
-
-#define SHADE(f, v) clamp0(f - v)
-
-void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- const int b = src_argb0[0];
- const int g = src_argb0[1];
- const int r = src_argb0[2];
- const int a = src_argb0[3];
- const int b_sub = src_argb1[0];
- const int g_sub = src_argb1[1];
- const int r_sub = src_argb1[2];
- const int a_sub = src_argb1[3];
- dst_argb[0] = SHADE(b, b_sub);
- dst_argb[1] = SHADE(g, g_sub);
- dst_argb[2] = SHADE(r, r_sub);
- dst_argb[3] = SHADE(a, a_sub);
- src_argb0 += 4;
- src_argb1 += 4;
- dst_argb += 4;
- }
-}
-#undef SHADE
-
-// Sobel functions which mimics SSSE3.
-void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
- uint8* dst_sobelx, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int a = src_y0[i];
- int b = src_y1[i];
- int c = src_y2[i];
- int a_sub = src_y0[i + 2];
- int b_sub = src_y1[i + 2];
- int c_sub = src_y2[i + 2];
- int a_diff = a - a_sub;
- int b_diff = b - b_sub;
- int c_diff = c - c_sub;
- int sobel = Abs(a_diff + b_diff * 2 + c_diff);
- dst_sobelx[i] = (uint8)(clamp255(sobel));
- }
-}
-
-void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int a = src_y0[i + 0];
- int b = src_y0[i + 1];
- int c = src_y0[i + 2];
- int a_sub = src_y1[i + 0];
- int b_sub = src_y1[i + 1];
- int c_sub = src_y1[i + 2];
- int a_diff = a - a_sub;
- int b_diff = b - b_sub;
- int c_diff = c - c_sub;
- int sobel = Abs(a_diff + b_diff * 2 + c_diff);
- dst_sobely[i] = (uint8)(clamp255(sobel));
- }
-}
-
-void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int r = src_sobelx[i];
- int b = src_sobely[i];
- int s = clamp255(r + b);
- dst_argb[0] = (uint8)(s);
- dst_argb[1] = (uint8)(s);
- dst_argb[2] = (uint8)(s);
- dst_argb[3] = (uint8)(255u);
- dst_argb += 4;
- }
-}
-
-void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int r = src_sobelx[i];
- int b = src_sobely[i];
- int s = clamp255(r + b);
- dst_y[i] = (uint8)(s);
- }
-}
-
-void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int r = src_sobelx[i];
- int b = src_sobely[i];
- int g = clamp255(r + b);
- dst_argb[0] = (uint8)(b);
- dst_argb[1] = (uint8)(g);
- dst_argb[2] = (uint8)(r);
- dst_argb[3] = (uint8)(255u);
- dst_argb += 4;
- }
-}
-
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
- // Copy a Y to RGB.
- int x;
- for (x = 0; x < width; ++x) {
- uint8 y = src_y[0];
- dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
- dst_argb[3] = 255u;
- dst_argb += 4;
- ++src_y;
- }
-}
-
-// C reference code that mimics the YUV assembly.
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
- uint8* b, uint8* g, uint8* r) {
- int32 y1 = ((int32)(y) - 16) * YG;
- *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
- *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
- *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
-}
-
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-// C mimic assembly.
-// TODO(fbarchard): Remove subsampling from Neon.
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
- uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
- YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 2;
- src_v += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- }
-}
-#else
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width; ++x) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- src_y += 1;
- src_u += 1;
- src_v += 1;
- rgb_buf += 4; // Advance 1 pixel.
- }
-}
-#endif
-// Also used for 420
-void I422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void I422ToRGB24Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 6; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- }
-}
-
-void I422ToRAWRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 6; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- }
-}
-
-void I422ToARGB4444Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
- b0 = b0 >> 4;
- g0 = g0 >> 4;
- r0 = r0 >> 4;
- b1 = b1 >> 4;
- g1 = g1 >> 4;
- r1 = r1 >> 4;
- *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
- (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_argb4444 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- b0 = b0 >> 4;
- g0 = g0 >> 4;
- r0 = r0 >> 4;
- *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
- 0xf000;
- }
-}
-
-void I422ToARGB1555Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 3;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 3;
- r1 = r1 >> 3;
- *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
- (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_argb1555 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 3;
- r0 = r0 >> 3;
- *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
- 0x8000;
- }
-}
-
-void I422ToRGB565Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void I411ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 3; x += 4) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- YuvPixel(src_y[2], src_u[0], src_v[0],
- rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
- rgb_buf[11] = 255;
- YuvPixel(src_y[3], src_u[0], src_v[0],
- rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
- rgb_buf[15] = 255;
- src_y += 4;
- src_u += 1;
- src_v += 1;
- rgb_buf += 16; // Advance 4 pixels.
- }
- if (width & 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void NV12ToARGBRow_C(const uint8* src_y,
- const uint8* usrc_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- usrc_v += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
-
- YuvPixel(src_y[1], src_vu[1], src_vu[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
-
- src_y += 2;
- src_vu += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void NV12ToRGB565Row_C(const uint8* src_y,
- const uint8* usrc_v,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
- YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- usrc_v += 2;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void NV21ToRGB565Row_C(const uint8* src_y,
- const uint8* vsrc_u,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- vsrc_u += 2;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_yuy2 += 4;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void UYVYToARGBRow_C(const uint8* src_uyvy,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_uyvy += 4;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void I422ToBGRARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
- rgb_buf[0] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
- rgb_buf[4] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
- rgb_buf[0] = 255;
- }
-}
-
-void I422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- }
-}
-
-void I422ToRGBARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
- rgb_buf[0] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
- rgb_buf[4] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
- rgb_buf[0] = 255;
- }
-}
-
-void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], 128, 128,
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], 128, 128,
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], 128, 128,
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void MirrorRow_C(const uint8* src, uint8* dst, int width) {
- int x;
- src += width - 1;
- for (x = 0; x < width - 1; x += 2) {
- dst[x] = src[0];
- dst[x + 1] = src[-1];
- src -= 2;
- }
- if (width & 1) {
- dst[width - 1] = src[0];
- }
-}
-
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
- int x;
- src_uv += (width - 1) << 1;
- for (x = 0; x < width - 1; x += 2) {
- dst_u[x] = src_uv[0];
- dst_u[x + 1] = src_uv[-2];
- dst_v[x] = src_uv[1];
- dst_v[x + 1] = src_uv[-2 + 1];
- src_uv -= 4;
- }
- if (width & 1) {
- dst_u[width - 1] = src_uv[0];
- dst_v[width - 1] = src_uv[1];
- }
-}
-
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
- int x;
- const uint32* src32 = (const uint32*)(src);
- uint32* dst32 = (uint32*)(dst);
- src32 += width - 1;
- for (x = 0; x < width - 1; x += 2) {
- dst32[x] = src32[0];
- dst32[x + 1] = src32[-1];
- src32 -= 2;
- }
- if (width & 1) {
- dst32[width - 1] = src32[0];
- }
-}
-
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_u[x] = src_uv[0];
- dst_u[x + 1] = src_uv[2];
- dst_v[x] = src_uv[1];
- dst_v[x + 1] = src_uv[3];
- src_uv += 4;
- }
- if (width & 1) {
- dst_u[width - 1] = src_uv[0];
- dst_v[width - 1] = src_uv[1];
- }
-}
-
-void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_uv[0] = src_u[x];
- dst_uv[1] = src_v[x];
- dst_uv[2] = src_u[x + 1];
- dst_uv[3] = src_v[x + 1];
- dst_uv += 4;
- }
- if (width & 1) {
- dst_uv[0] = src_u[width - 1];
- dst_uv[1] = src_v[width - 1];
- }
-}
-
-void CopyRow_C(const uint8* src, uint8* dst, int count) {
- memcpy(dst, src, count);
-}
-
-void SetRow_C(uint8* dst, uint32 v8, int count) {
-#ifdef _MSC_VER
- // VC will generate rep stosb.
- int x;
- for (x = 0; x < count; ++x) {
- dst[x] = v8;
- }
-#else
- memset(dst, v8, count);
-#endif
-}
-
-void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- int y;
- for (y = 0; y < height; ++y) {
- uint32* d = (uint32*)(dst);
- int x;
- for (x = 0; x < width; ++x) {
- d[x] = v32;
- }
- dst += dst_stride;
- }
-}
-
-// Filter 2 rows of YUY2 UV's (422) into U and V (420).
-void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values, filtering 2 rows of YUY2.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
- dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
- src_yuy2 += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of YUY2 UV's (422) into U and V (422).
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = src_yuy2[1];
- dst_v[0] = src_yuy2[3];
- src_yuy2 += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of YUY2 Y's (422) into Y (420/422).
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
- // Output a row of Y values.
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_y[x] = src_yuy2[0];
- dst_y[x + 1] = src_yuy2[2];
- src_yuy2 += 4;
- }
- if (width & 1) {
- dst_y[width - 1] = src_yuy2[0];
- }
-}
-
-// Filter 2 rows of UYVY UV's (422) into U and V (420).
-void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
- dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
- src_uyvy += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of UYVY UV's (422) into U and V (422).
-void UYVYToUV422Row_C(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = src_uyvy[0];
- dst_v[0] = src_uyvy[2];
- src_uyvy += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of UYVY Y's (422) into Y (420/422).
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
- // Output a row of Y values.
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_y[x] = src_uyvy[1];
- dst_y[x + 1] = src_uyvy[3];
- src_uyvy += 4;
- }
- if (width & 1) {
- dst_y[width - 1] = src_uyvy[1];
- }
-}
-
-#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
-
-// Blend src_argb0 over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb0 or src_argb1.
-// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint32 fb = src_argb0[0];
- uint32 fg = src_argb0[1];
- uint32 fr = src_argb0[2];
- uint32 a = src_argb0[3];
- uint32 bb = src_argb1[0];
- uint32 bg = src_argb1[1];
- uint32 br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
- dst_argb[3] = 255u;
-
- fb = src_argb0[4 + 0];
- fg = src_argb0[4 + 1];
- fr = src_argb0[4 + 2];
- a = src_argb0[4 + 3];
- bb = src_argb1[4 + 0];
- bg = src_argb1[4 + 1];
- br = src_argb1[4 + 2];
- dst_argb[4 + 0] = BLEND(fb, bb, a);
- dst_argb[4 + 1] = BLEND(fg, bg, a);
- dst_argb[4 + 2] = BLEND(fr, br, a);
- dst_argb[4 + 3] = 255u;
- src_argb0 += 8;
- src_argb1 += 8;
- dst_argb += 8;
- }
-
- if (width & 1) {
- uint32 fb = src_argb0[0];
- uint32 fg = src_argb0[1];
- uint32 fr = src_argb0[2];
- uint32 a = src_argb0[3];
- uint32 bb = src_argb1[0];
- uint32 bg = src_argb1[1];
- uint32 br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
- dst_argb[3] = 255u;
- }
-}
-#undef BLEND
-#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
-
-// Multiply source RGB by alpha and store to destination.
-// This code mimics the SSSE3 version for better testability.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width - 1; i += 2) {
- uint32 b = src_argb[0];
- uint32 g = src_argb[1];
- uint32 r = src_argb[2];
- uint32 a = src_argb[3];
- dst_argb[0] = ATTENUATE(b, a);
- dst_argb[1] = ATTENUATE(g, a);
- dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
- b = src_argb[4];
- g = src_argb[5];
- r = src_argb[6];
- a = src_argb[7];
- dst_argb[4] = ATTENUATE(b, a);
- dst_argb[5] = ATTENUATE(g, a);
- dst_argb[6] = ATTENUATE(r, a);
- dst_argb[7] = a;
- src_argb += 8;
- dst_argb += 8;
- }
-
- if (width & 1) {
- const uint32 b = src_argb[0];
- const uint32 g = src_argb[1];
- const uint32 r = src_argb[2];
- const uint32 a = src_argb[3];
- dst_argb[0] = ATTENUATE(b, a);
- dst_argb[1] = ATTENUATE(g, a);
- dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
- }
-}
-#undef ATTENUATE
-
-// Divide source RGB by alpha and store to destination.
-// b = (b * 255 + (a / 2)) / a;
-// g = (g * 255 + (a / 2)) / a;
-// r = (r * 255 + (a / 2)) / a;
-// Reciprocal method is off by 1 on some values. ie 125
-// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
-#define T(a) 0x01000000 + (0x10000 / a)
-const uint32 fixed_invtbl8[256] = {
- 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
- T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
- T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
- T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
- T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
- T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
- T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
- T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
- T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
- T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
- T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
- T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
- T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
- T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
- T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
- T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
- T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
- T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
- T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
- T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
- T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
- T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
- T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
- T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
- T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
- T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
- T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
- T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
- T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
- T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
- T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
- T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
-#undef T
-
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- uint32 b = src_argb[0];
- uint32 g = src_argb[1];
- uint32 r = src_argb[2];
- const uint32 a = src_argb[3];
- const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
- b = (b * ia) >> 8;
- g = (g * ia) >> 8;
- r = (r * ia) >> 8;
- // Clamping should not be necessary but is free in assembly.
- dst_argb[0] = clamp255(b);
- dst_argb[1] = clamp255(g);
- dst_argb[2] = clamp255(r);
- dst_argb[3] = a;
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) {
- int32 row_sum[4] = {0, 0, 0, 0};
- int x;
- for (x = 0; x < width; ++x) {
- row_sum[0] += row[x * 4 + 0];
- row_sum[1] += row[x * 4 + 1];
- row_sum[2] += row[x * 4 + 2];
- row_sum[3] += row[x * 4 + 3];
- cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
- cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
- cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
- cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
- }
-}
-
-void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
- int w, int area, uint8* dst, int count) {
- float ooa = 1.0f / area;
- int i;
- for (i = 0; i < count; ++i) {
- dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
- dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
- dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
- dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
- dst += 4;
- tl += 4;
- bl += 4;
- }
-}
-
-// Copy pixels from rotated source to destination row with a slope.
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width) {
- int i;
- // Render a row of pixels from source into a buffer.
- float uv[2];
- uv[0] = uv_dudv[0];
- uv[1] = uv_dudv[1];
- for (i = 0; i < width; ++i) {
- int x = (int)(uv[0]);
- int y = (int)(uv[1]);
- *(uint32*)(dst_argb) =
- *(const uint32*)(src_argb + y * src_argb_stride +
- x * 4);
- dst_argb += 4;
- uv[0] += uv_dudv[2];
- uv[1] += uv_dudv[3];
- }
-}
-
-// Blend 2 rows into 1 for conversions such as I422ToI420.
-void HalfRow_C(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- int x;
- for (x = 0; x < pix; ++x) {
- dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
- }
-}
-
-// C version 2x2 -> 2x1.
-void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride,
- int width, int source_y_fraction) {
- int y1_fraction = source_y_fraction;
- int y0_fraction = 256 - y1_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
- int x;
- if (source_y_fraction == 0) {
- memcpy(dst_ptr, src_ptr, width);
- return;
- }
- if (source_y_fraction == 128) {
- HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
- return;
- }
- for (x = 0; x < width - 1; x += 2) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
- dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
- src_ptr += 2;
- src_ptr1 += 2;
- dst_ptr += 2;
- }
- if (width & 1) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
- }
-}
-
-// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
-void ARGBToBayerRow_C(const uint8* src_argb,
- uint8* dst_bayer, uint32 selector, int pix) {
- int index0 = selector & 0xff;
- int index1 = (selector >> 8) & 0xff;
- // Copy a row of Bayer.
- int x;
- for (x = 0; x < pix - 1; x += 2) {
- dst_bayer[0] = src_argb[index0];
- dst_bayer[1] = src_argb[index1];
- src_argb += 8;
- dst_bayer += 2;
- }
- if (pix & 1) {
- dst_bayer[0] = src_argb[index0];
- }
-}
-
-// Select G channel from ARGB. e.g. GGGGGGGG
-void ARGBToBayerGGRow_C(const uint8* src_argb,
- uint8* dst_bayer, uint32 selector, int pix) {
- // Copy a row of G.
- int x;
- for (x = 0; x < pix - 1; x += 2) {
- dst_bayer[0] = src_argb[1];
- dst_bayer[1] = src_argb[5];
- src_argb += 8;
- dst_bayer += 2;
- }
- if (pix & 1) {
- dst_bayer[0] = src_argb[1];
- }
-}
-
-// Use first 4 shuffler values to reorder ARGB channels.
-void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- int index0 = shuffler[0];
- int index1 = shuffler[1];
- int index2 = shuffler[2];
- int index3 = shuffler[3];
- // Shuffle a row of ARGB.
- int x;
- for (x = 0; x < pix; ++x) {
- // To support in-place conversion.
- uint8 b = src_argb[index0];
- uint8 g = src_argb[index1];
- uint8 r = src_argb[index2];
- uint8 a = src_argb[index3];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = a;
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-void I422ToYUY2Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_frame[0] = src_y[0];
- dst_frame[1] = src_u[0];
- dst_frame[2] = src_y[1];
- dst_frame[3] = src_v[0];
- dst_frame += 4;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- }
- if (width & 1) {
- dst_frame[0] = src_y[0];
- dst_frame[1] = src_u[0];
- dst_frame[2] = src_y[0]; // duplicate last y
- dst_frame[3] = src_v[0];
- }
-}
-
-void I422ToUYVYRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_frame[0] = src_u[0];
- dst_frame[1] = src_y[0];
- dst_frame[2] = src_v[0];
- dst_frame[3] = src_y[1];
- dst_frame += 4;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- }
- if (width & 1) {
- dst_frame[0] = src_u[0];
- dst_frame[1] = src_y[0];
- dst_frame[2] = src_v[0];
- dst_frame[3] = src_y[0]; // duplicate last y
- }
-}
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
-// row_win.cc has asm version, but GCC uses 2 step wrapper.
-#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
- ARGBToRGB565Row_SSE2(row, rgb_buf, width);
- free_aligned_buffer_64(row);
-}
-#endif // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
-
-#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
- ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
- free_aligned_buffer_64(row);
-}
-
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
- ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
- free_aligned_buffer_64(row);
-}
-
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
- free_aligned_buffer_64(row);
-}
-
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_rgb565,
- int width) {
- // Allocate a row of ARGB.
- align_buffer_64(row, width * 4);
- NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
- free_aligned_buffer_64(row);
-}
-
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
- YUY2ToYRow_SSE2(src_yuy2, row_y, width);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
- YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
- I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
- UYVYToYRow_SSE2(src_uyvy, row_y, width);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width) {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
- UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
- UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
- I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
- free_aligned_buffer_64(row_y);
-}
-
-#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-#endif // !defined(LIBYUV_DISABLE_X86)
-
-void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- float b = (float)(src_argb[0]);
- float g = (float)(src_argb[1]);
- float r = (float)(src_argb[2]);
- float a = (float)(src_argb[3]);
- float b2 = b * b;
- float g2 = g * g;
- float r2 = r * r;
- float a2 = a * a;
- float db = poly[0] + poly[4] * b;
- float dg = poly[1] + poly[5] * g;
- float dr = poly[2] + poly[6] * r;
- float da = poly[3] + poly[7] * a;
- float b3 = b2 * b;
- float g3 = g2 * g;
- float r3 = r2 * r;
- float a3 = a2 * a;
- db += poly[8] * b2;
- dg += poly[9] * g2;
- dr += poly[10] * r2;
- da += poly[11] * a2;
- db += poly[12] * b3;
- dg += poly[13] * g3;
- dr += poly[14] * r3;
- da += poly[15] * a3;
-
- dst_argb[0] = Clamp((int32)(db));
- dst_argb[1] = Clamp((int32)(dg));
- dst_argb[2] = Clamp((int32)(dr));
- dst_argb[3] = Clamp((int32)(da));
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- const uint8* luma, uint32 lumacoeff) {
- uint32 bc = lumacoeff & 0xff;
- uint32 gc = (lumacoeff >> 8) & 0xff;
- uint32 rc = (lumacoeff >> 16) & 0xff;
-
- int i;
- for (i = 0; i < width - 1; i += 2) {
- // Luminance in rows, color values in columns.
- const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
- src_argb[2] * rc) & 0x7F00u) + luma;
- const uint8* luma1;
- dst_argb[0] = luma0[src_argb[0]];
- dst_argb[1] = luma0[src_argb[1]];
- dst_argb[2] = luma0[src_argb[2]];
- dst_argb[3] = src_argb[3];
- luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
- src_argb[6] * rc) & 0x7F00u) + luma;
- dst_argb[4] = luma1[src_argb[4]];
- dst_argb[5] = luma1[src_argb[5]];
- dst_argb[6] = luma1[src_argb[6]];
- dst_argb[7] = src_argb[7];
- src_argb += 8;
- dst_argb += 8;
- }
- if (width & 1) {
- // Luminance in rows, color values in columns.
- const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
- src_argb[2] * rc) & 0x7F00u) + luma;
- dst_argb[0] = luma0[src_argb[0]];
- dst_argb[1] = luma0[src_argb[1]];
- dst_argb[2] = luma0[src_argb[2]];
- dst_argb[3] = src_argb[3];
- }
-}
-
-void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
- int i;
- for (i = 0; i < width - 1; i += 2) {
- dst[3] = src[3];
- dst[7] = src[7];
- dst += 8;
- src += 8;
- }
- if (width & 1) {
- dst[3] = src[3];
- }
-}
-
-void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
- int i;
- for (i = 0; i < width - 1; i += 2) {
- dst[3] = src[0];
- dst[7] = src[1];
- dst += 8;
- src += 2;
- }
- if (width & 1) {
- dst[3] = src[0];
- }
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc
deleted file mode 100755
index 4435c55c5c..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc
+++ /dev/null
@@ -1,991 +0,0 @@
-/*
- * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
-
-#ifdef HAS_COPYROW_MIPS
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
- __asm__ __volatile__ (
- ".set noreorder \n"
- ".set noat \n"
- "slti $at, %[count], 8 \n"
- "bne $at ,$zero, $last8 \n"
- "xor $t8, %[src], %[dst] \n"
- "andi $t8, $t8, 0x3 \n"
-
- "bne $t8, $zero, unaligned \n"
- "negu $a3, %[dst] \n"
- // make dst/src aligned
- "andi $a3, $a3, 0x3 \n"
- "beq $a3, $zero, $chk16w \n"
- // word-aligned now count is the remining bytes count
- "subu %[count], %[count], $a3 \n"
-
- "lwr $t8, 0(%[src]) \n"
- "addu %[src], %[src], $a3 \n"
- "swr $t8, 0(%[dst]) \n"
- "addu %[dst], %[dst], $a3 \n"
-
- // Now the dst/src are mutually word-aligned with word-aligned addresses
- "$chk16w: \n"
- "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
- // t8 is the byte count after 64-byte chunks
- "beq %[count], $t8, chk8w \n"
- // There will be at most 1 32-byte chunk after it
- "subu $a3, %[count], $t8 \n" // the reminder
- // Here a3 counts bytes in 16w chunks
- "addu $a3, %[dst], $a3 \n"
- // Now a3 is the final dst after 64-byte chunks
- "addu $t0, %[dst], %[count] \n"
- // t0 is the "past the end" address
-
- // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
- // the "t0-32" address
- // This means: for x=128 the last "safe" a1 address is "t0-160"
- // Alternatively, for x=64 the last "safe" a1 address is "t0-96"
- // we will use "pref 30,128(a1)", so "t0-160" is the limit
- "subu $t9, $t0, 160 \n"
- // t9 is the "last safe pref 30,128(a1)" address
- "pref 0, 0(%[src]) \n" // first line of src
- "pref 0, 32(%[src]) \n" // second line of src
- "pref 0, 64(%[src]) \n"
- "pref 30, 32(%[dst]) \n"
- // In case the a1 > t9 don't use "pref 30" at all
- "sgtu $v1, %[dst], $t9 \n"
- "bgtz $v1, $loop16w \n"
- "nop \n"
- // otherwise, start with using pref30
- "pref 30, 64(%[dst]) \n"
- "$loop16w: \n"
- "pref 0, 96(%[src]) \n"
- "lw $t0, 0(%[src]) \n"
- "bgtz $v1, $skip_pref30_96 \n" // skip
- "lw $t1, 4(%[src]) \n"
- "pref 30, 96(%[dst]) \n" // continue
- "$skip_pref30_96: \n"
- "lw $t2, 8(%[src]) \n"
- "lw $t3, 12(%[src]) \n"
- "lw $t4, 16(%[src]) \n"
- "lw $t5, 20(%[src]) \n"
- "lw $t6, 24(%[src]) \n"
- "lw $t7, 28(%[src]) \n"
- "pref 0, 128(%[src]) \n"
- // bring the next lines of src, addr 128
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "lw $t0, 32(%[src]) \n"
- "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1)
- "lw $t1, 36(%[src]) \n"
- "pref 30, 128(%[dst]) \n" // set dest, addr 128
- "$skip_pref30_128: \n"
- "lw $t2, 40(%[src]) \n"
- "lw $t3, 44(%[src]) \n"
- "lw $t4, 48(%[src]) \n"
- "lw $t5, 52(%[src]) \n"
- "lw $t6, 56(%[src]) \n"
- "lw $t7, 60(%[src]) \n"
- "pref 0, 160(%[src]) \n"
- // bring the next lines of src, addr 160
- "sw $t0, 32(%[dst]) \n"
- "sw $t1, 36(%[dst]) \n"
- "sw $t2, 40(%[dst]) \n"
- "sw $t3, 44(%[dst]) \n"
- "sw $t4, 48(%[dst]) \n"
- "sw $t5, 52(%[dst]) \n"
- "sw $t6, 56(%[dst]) \n"
- "sw $t7, 60(%[dst]) \n"
-
- "addiu %[dst], %[dst], 64 \n" // adding 64 to dest
- "sgtu $v1, %[dst], $t9 \n"
- "bne %[dst], $a3, $loop16w \n"
- " addiu %[src], %[src], 64 \n" // adding 64 to src
- "move %[count], $t8 \n"
-
- // Here we have src and dest word-aligned but less than 64-bytes to go
-
- "chk8w: \n"
- "pref 0, 0x0(%[src]) \n"
- "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
- // the t8 is the reminder count past 32-bytes
- "beq %[count], $t8, chk1w \n"
- // count=t8,no 32-byte chunk
- " nop \n"
-
- "lw $t0, 0(%[src]) \n"
- "lw $t1, 4(%[src]) \n"
- "lw $t2, 8(%[src]) \n"
- "lw $t3, 12(%[src]) \n"
- "lw $t4, 16(%[src]) \n"
- "lw $t5, 20(%[src]) \n"
- "lw $t6, 24(%[src]) \n"
- "lw $t7, 28(%[src]) \n"
- "addiu %[src], %[src], 32 \n"
-
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "addiu %[dst], %[dst], 32 \n"
-
- "chk1w: \n"
- "andi %[count], $t8, 0x3 \n"
- // now count is the reminder past 1w chunks
- "beq %[count], $t8, $last8 \n"
- " subu $a3, $t8, %[count] \n"
- // a3 is count of bytes in 1w chunks
- "addu $a3, %[dst], $a3 \n"
- // now a3 is the dst address past the 1w chunks
- // copying in words (4-byte chunks)
- "$wordCopy_loop: \n"
- "lw $t3, 0(%[src]) \n"
- // the first t3 may be equal t0 ... optimize?
- "addiu %[src], %[src],4 \n"
- "addiu %[dst], %[dst],4 \n"
- "bne %[dst], $a3,$wordCopy_loop \n"
- " sw $t3, -4(%[dst]) \n"
-
- // For the last (<8) bytes
- "$last8: \n"
- "blez %[count], leave \n"
- " addu $a3, %[dst], %[count] \n" // a3 -last dst address
- "$last8loop: \n"
- "lb $v1, 0(%[src]) \n"
- "addiu %[src], %[src], 1 \n"
- "addiu %[dst], %[dst], 1 \n"
- "bne %[dst], $a3, $last8loop \n"
- " sb $v1, -1(%[dst]) \n"
-
- "leave: \n"
- " j $ra \n"
- " nop \n"
-
- //
- // UNALIGNED case
- //
-
- "unaligned: \n"
- // got here with a3="negu a1"
- "andi $a3, $a3, 0x3 \n" // a1 is word aligned?
- "beqz $a3, $ua_chk16w \n"
- " subu %[count], %[count], $a3 \n"
- // bytes left after initial a3 bytes
- "lwr $v1, 0(%[src]) \n"
- "lwl $v1, 3(%[src]) \n"
- "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3
- "swr $v1, 0(%[dst]) \n"
- "addu %[dst], %[dst], $a3 \n"
- // below the dst will be word aligned (NOTE1)
- "$ua_chk16w: \n"
- "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
- // t8 is the byte count after 64-byte chunks
- "beq %[count], $t8, ua_chk8w \n"
- // if a2==t8, no 64-byte chunks
- // There will be at most 1 32-byte chunk after it
- "subu $a3, %[count], $t8 \n" // the reminder
- // Here a3 counts bytes in 16w chunks
- "addu $a3, %[dst], $a3 \n"
- // Now a3 is the final dst after 64-byte chunks
- "addu $t0, %[dst], %[count] \n" // t0 "past the end"
- "subu $t9, $t0, 160 \n"
- // t9 is the "last safe pref 30,128(a1)" address
- "pref 0, 0(%[src]) \n" // first line of src
- "pref 0, 32(%[src]) \n" // second line addr 32
- "pref 0, 64(%[src]) \n"
- "pref 30, 32(%[dst]) \n"
- // safe, as we have at least 64 bytes ahead
- // In case the a1 > t9 don't use "pref 30" at all
- "sgtu $v1, %[dst], $t9 \n"
- "bgtz $v1, $ua_loop16w \n"
- // skip "pref 30,64(a1)" for too short arrays
- " nop \n"
- // otherwise, start with using pref30
- "pref 30, 64(%[dst]) \n"
- "$ua_loop16w: \n"
- "pref 0, 96(%[src]) \n"
- "lwr $t0, 0(%[src]) \n"
- "lwl $t0, 3(%[src]) \n"
- "lwr $t1, 4(%[src]) \n"
- "bgtz $v1, $ua_skip_pref30_96 \n"
- " lwl $t1, 7(%[src]) \n"
- "pref 30, 96(%[dst]) \n"
- // continue setting up the dest, addr 96
- "$ua_skip_pref30_96: \n"
- "lwr $t2, 8(%[src]) \n"
- "lwl $t2, 11(%[src]) \n"
- "lwr $t3, 12(%[src]) \n"
- "lwl $t3, 15(%[src]) \n"
- "lwr $t4, 16(%[src]) \n"
- "lwl $t4, 19(%[src]) \n"
- "lwr $t5, 20(%[src]) \n"
- "lwl $t5, 23(%[src]) \n"
- "lwr $t6, 24(%[src]) \n"
- "lwl $t6, 27(%[src]) \n"
- "lwr $t7, 28(%[src]) \n"
- "lwl $t7, 31(%[src]) \n"
- "pref 0, 128(%[src]) \n"
- // bring the next lines of src, addr 128
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "lwr $t0, 32(%[src]) \n"
- "lwl $t0, 35(%[src]) \n"
- "lwr $t1, 36(%[src]) \n"
- "bgtz $v1, ua_skip_pref30_128 \n"
- " lwl $t1, 39(%[src]) \n"
- "pref 30, 128(%[dst]) \n"
- // continue setting up the dest, addr 128
- "ua_skip_pref30_128: \n"
-
- "lwr $t2, 40(%[src]) \n"
- "lwl $t2, 43(%[src]) \n"
- "lwr $t3, 44(%[src]) \n"
- "lwl $t3, 47(%[src]) \n"
- "lwr $t4, 48(%[src]) \n"
- "lwl $t4, 51(%[src]) \n"
- "lwr $t5, 52(%[src]) \n"
- "lwl $t5, 55(%[src]) \n"
- "lwr $t6, 56(%[src]) \n"
- "lwl $t6, 59(%[src]) \n"
- "lwr $t7, 60(%[src]) \n"
- "lwl $t7, 63(%[src]) \n"
- "pref 0, 160(%[src]) \n"
- // bring the next lines of src, addr 160
- "sw $t0, 32(%[dst]) \n"
- "sw $t1, 36(%[dst]) \n"
- "sw $t2, 40(%[dst]) \n"
- "sw $t3, 44(%[dst]) \n"
- "sw $t4, 48(%[dst]) \n"
- "sw $t5, 52(%[dst]) \n"
- "sw $t6, 56(%[dst]) \n"
- "sw $t7, 60(%[dst]) \n"
-
- "addiu %[dst],%[dst],64 \n" // adding 64 to dest
- "sgtu $v1,%[dst],$t9 \n"
- "bne %[dst],$a3,$ua_loop16w \n"
- " addiu %[src],%[src],64 \n" // adding 64 to src
- "move %[count],$t8 \n"
-
- // Here we have src and dest word-aligned but less than 64-bytes to go
-
- "ua_chk8w: \n"
- "pref 0, 0x0(%[src]) \n"
- "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
- // the t8 is the reminder count
- "beq %[count], $t8, $ua_chk1w \n"
- // when count==t8, no 32-byte chunk
-
- "lwr $t0, 0(%[src]) \n"
- "lwl $t0, 3(%[src]) \n"
- "lwr $t1, 4(%[src]) \n"
- "lwl $t1, 7(%[src]) \n"
- "lwr $t2, 8(%[src]) \n"
- "lwl $t2, 11(%[src]) \n"
- "lwr $t3, 12(%[src]) \n"
- "lwl $t3, 15(%[src]) \n"
- "lwr $t4, 16(%[src]) \n"
- "lwl $t4, 19(%[src]) \n"
- "lwr $t5, 20(%[src]) \n"
- "lwl $t5, 23(%[src]) \n"
- "lwr $t6, 24(%[src]) \n"
- "lwl $t6, 27(%[src]) \n"
- "lwr $t7, 28(%[src]) \n"
- "lwl $t7, 31(%[src]) \n"
- "addiu %[src], %[src], 32 \n"
-
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "addiu %[dst], %[dst], 32 \n"
-
- "$ua_chk1w: \n"
- "andi %[count], $t8, 0x3 \n"
- // now count is the reminder past 1w chunks
- "beq %[count], $t8, ua_smallCopy \n"
- "subu $a3, $t8, %[count] \n"
- // a3 is count of bytes in 1w chunks
- "addu $a3, %[dst], $a3 \n"
- // now a3 is the dst address past the 1w chunks
-
- // copying in words (4-byte chunks)
- "$ua_wordCopy_loop: \n"
- "lwr $v1, 0(%[src]) \n"
- "lwl $v1, 3(%[src]) \n"
- "addiu %[src], %[src], 4 \n"
- "addiu %[dst], %[dst], 4 \n"
- // note: dst=a1 is word aligned here, see NOTE1
- "bne %[dst], $a3, $ua_wordCopy_loop \n"
- " sw $v1,-4(%[dst]) \n"
-
- // Now less than 4 bytes (value in count) left to copy
- "ua_smallCopy: \n"
- "beqz %[count], leave \n"
- " addu $a3, %[dst], %[count] \n" // a3 = last dst address
- "$ua_smallCopy_loop: \n"
- "lb $v1, 0(%[src]) \n"
- "addiu %[src], %[src], 1 \n"
- "addiu %[dst], %[dst], 1 \n"
- "bne %[dst],$a3,$ua_smallCopy_loop \n"
- " sb $v1, -1(%[dst]) \n"
-
- "j $ra \n"
- " nop \n"
- ".set at \n"
- ".set reorder \n"
- : [dst] "+r" (dst), [src] "+r" (src)
- : [count] "r" (count)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
- "t8", "t9", "a3", "v1", "at"
- );
-}
-#endif // HAS_COPYROW_MIPS
-
-// MIPS DSPR2 functions
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
- (__mips_dsp_rev >= 2)
-void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "blez $t4, 2f \n"
- " andi %[width], %[width], 0xf \n" // residual
-
- ".p2align 2 \n"
- "1: \n"
- "addiu $t4, $t4, -1 \n"
- "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
- "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
- "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
- "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
- "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
- "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
- "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
- "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
- "addiu %[src_uv], %[src_uv], 32 \n"
- "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
- "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
- "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
- "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
- "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
- "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
- "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
- "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
- "sw $t9, 0(%[dst_v]) \n"
- "sw $t0, 0(%[dst_u]) \n"
- "sw $t1, 4(%[dst_v]) \n"
- "sw $t2, 4(%[dst_u]) \n"
- "sw $t3, 8(%[dst_v]) \n"
- "sw $t5, 8(%[dst_u]) \n"
- "sw $t6, 12(%[dst_v]) \n"
- "sw $t7, 12(%[dst_u]) \n"
- "addiu %[dst_v], %[dst_v], 16 \n"
- "bgtz $t4, 1b \n"
- " addiu %[dst_u], %[dst_u], 16 \n"
-
- "beqz %[width], 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, 0(%[src_uv]) \n"
- "lbu $t1, 1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], 2 \n"
- "addiu %[width], %[width], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[width], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r" (src_uv),
- [width] "+r" (width),
- [dst_u] "+r" (dst_u),
- [dst_v] "+r" (dst_v)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6", "t7", "t8", "t9"
- );
-}
-
-void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
- uint8* dst_v, int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "blez $t4, 2f \n"
- " andi %[width], %[width], 0xf \n" // residual
-
- ".p2align 2 \n"
- "1: \n"
- "addiu $t4, $t4, -1 \n"
- "lwr $t0, 0(%[src_uv]) \n"
- "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
- "lwr $t1, 4(%[src_uv]) \n"
- "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
- "lwr $t2, 8(%[src_uv]) \n"
- "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
- "lwr $t3, 12(%[src_uv]) \n"
- "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
- "lwr $t5, 16(%[src_uv]) \n"
- "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
- "lwr $t6, 20(%[src_uv]) \n"
- "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
- "lwr $t7, 24(%[src_uv]) \n"
- "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
- "lwr $t8, 28(%[src_uv]) \n"
- "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
- "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
- "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
- "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
- "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
- "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
- "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
- "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
- "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
- "addiu %[src_uv], %[src_uv], 32 \n"
- "swr $t9, 0(%[dst_v]) \n"
- "swl $t9, 3(%[dst_v]) \n"
- "swr $t0, 0(%[dst_u]) \n"
- "swl $t0, 3(%[dst_u]) \n"
- "swr $t1, 4(%[dst_v]) \n"
- "swl $t1, 7(%[dst_v]) \n"
- "swr $t2, 4(%[dst_u]) \n"
- "swl $t2, 7(%[dst_u]) \n"
- "swr $t3, 8(%[dst_v]) \n"
- "swl $t3, 11(%[dst_v]) \n"
- "swr $t5, 8(%[dst_u]) \n"
- "swl $t5, 11(%[dst_u]) \n"
- "swr $t6, 12(%[dst_v]) \n"
- "swl $t6, 15(%[dst_v]) \n"
- "swr $t7, 12(%[dst_u]) \n"
- "swl $t7, 15(%[dst_u]) \n"
- "addiu %[dst_u], %[dst_u], 16 \n"
- "bgtz $t4, 1b \n"
- " addiu %[dst_v], %[dst_v], 16 \n"
-
- "beqz %[width], 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, 0(%[src_uv]) \n"
- "lbu $t1, 1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], 2 \n"
- "addiu %[width], %[width], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[width], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r" (src_uv),
- [width] "+r" (width),
- [dst_u] "+r" (dst_u),
- [dst_v] "+r" (dst_v)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6", "t7", "t8", "t9"
- );
-}
-
-void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "andi $t5, %[width], 0xf \n"
- "blez $t4, 2f \n"
- " addu %[src], %[src], %[width] \n" // src += width
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, -16(%[src]) \n" // |3|2|1|0|
- "lw $t1, -12(%[src]) \n" // |7|6|5|4|
- "lw $t2, -8(%[src]) \n" // |11|10|9|8|
- "lw $t3, -4(%[src]) \n" // |15|14|13|12|
- "wsbh $t0, $t0 \n" // |2|3|0|1|
- "wsbh $t1, $t1 \n" // |6|7|4|5|
- "wsbh $t2, $t2 \n" // |10|11|8|9|
- "wsbh $t3, $t3 \n" // |14|15|12|13|
- "rotr $t0, $t0, 16 \n" // |0|1|2|3|
- "rotr $t1, $t1, 16 \n" // |4|5|6|7|
- "rotr $t2, $t2, 16 \n" // |8|9|10|11|
- "rotr $t3, $t3, 16 \n" // |12|13|14|15|
- "addiu %[src], %[src], -16 \n"
- "addiu $t4, $t4, -1 \n"
- "sw $t3, 0(%[dst]) \n" // |15|14|13|12|
- "sw $t2, 4(%[dst]) \n" // |11|10|9|8|
- "sw $t1, 8(%[dst]) \n" // |7|6|5|4|
- "sw $t0, 12(%[dst]) \n" // |3|2|1|0|
- "bgtz $t4, 1b \n"
- " addiu %[dst], %[dst], 16 \n"
- "beqz $t5, 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, -1(%[src]) \n"
- "addiu $t5, $t5, -1 \n"
- "addiu %[src], %[src], -1 \n"
- "sb $t0, 0(%[dst]) \n"
- "bgez $t5, 2b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src] "+r" (src), [dst] "+r" (dst)
- : [width] "r" (width)
- : "t0", "t1", "t2", "t3", "t4", "t5"
- );
-}
-
-void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- int x = 0;
- int y = 0;
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "addu $t4, %[width], %[width] \n"
- "srl %[x], %[width], 4 \n"
- "andi %[y], %[width], 0xf \n"
- "blez %[x], 2f \n"
- " addu %[src_uv], %[src_uv], $t4 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
- "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
- "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
- "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
- "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
- "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
- "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
- "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
-
- "rotr $t0, $t0, 16 \n" // |1|0|3|2|
- "rotr $t1, $t1, 16 \n" // |5|4|7|6|
- "rotr $t2, $t2, 16 \n" // |9|8|11|10|
- "rotr $t3, $t3, 16 \n" // |13|12|15|14|
- "rotr $t4, $t4, 16 \n" // |17|16|19|18|
- "rotr $t6, $t6, 16 \n" // |21|20|23|22|
- "rotr $t7, $t7, 16 \n" // |25|24|27|26|
- "rotr $t8, $t8, 16 \n" // |29|28|31|30|
- "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
- "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
- "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
- "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
- "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
- "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
- "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
- "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
- "addiu %[src_uv], %[src_uv], -32 \n"
- "addiu %[x], %[x], -1 \n"
- "swr $t4, 0(%[dst_u]) \n"
- "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
- "swr $t6, 0(%[dst_v]) \n"
- "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
- "swr $t2, 4(%[dst_u]) \n"
- "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
- "swr $t3, 4(%[dst_v]) \n"
- "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
- "swr $t0, 8(%[dst_u]) \n"
- "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
- "swr $t1, 8(%[dst_v]) \n"
- "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
- "swr $t9, 12(%[dst_u]) \n"
- "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
- "swr $t5, 12(%[dst_v]) \n"
- "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
- "addiu %[dst_v], %[dst_v], 16 \n"
- "bgtz %[x], 1b \n"
- " addiu %[dst_u], %[dst_u], 16 \n"
- "beqz %[y], 3f \n"
- " nop \n"
- "b 2f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, -2(%[src_uv]) \n"
- "lbu $t1, -1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], -2 \n"
- "addiu %[y], %[y], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[y], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r" (src_uv),
- [dst_u] "+r" (dst_u),
- [dst_v] "+r" (dst_v),
- [x] "=&r" (x),
- [y] "+r" (y)
- : [width] "r" (width)
- : "t0", "t1", "t2", "t3", "t4",
- "t5", "t7", "t8", "t9"
- );
-}
-
-// Convert (4 Y and 2 VU) I422 and arrange RGB values into
-// t5 = | 0 | B0 | 0 | b0 |
-// t4 = | 0 | B1 | 0 | b1 |
-// t9 = | 0 | G0 | 0 | g0 |
-// t8 = | 0 | G1 | 0 | g1 |
-// t2 = | 0 | R0 | 0 | r0 |
-// t1 = | 0 | R1 | 0 | r1 |
-#define I422ToTransientMipsRGB \
- "lw $t0, 0(%[y_buf]) \n" \
- "lhu $t1, 0(%[u_buf]) \n" \
- "lhu $t2, 0(%[v_buf]) \n" \
- "preceu.ph.qbr $t1, $t1 \n" \
- "preceu.ph.qbr $t2, $t2 \n" \
- "preceu.ph.qbra $t3, $t0 \n" \
- "preceu.ph.qbla $t0, $t0 \n" \
- "subu.ph $t1, $t1, $s5 \n" \
- "subu.ph $t2, $t2, $s5 \n" \
- "subu.ph $t3, $t3, $s4 \n" \
- "subu.ph $t0, $t0, $s4 \n" \
- "mul.ph $t3, $t3, $s0 \n" \
- "mul.ph $t0, $t0, $s0 \n" \
- "shll.ph $t4, $t1, 0x7 \n" \
- "subu.ph $t4, $t4, $t1 \n" \
- "mul.ph $t6, $t1, $s1 \n" \
- "mul.ph $t1, $t2, $s2 \n" \
- "addq_s.ph $t5, $t4, $t3 \n" \
- "addq_s.ph $t4, $t4, $t0 \n" \
- "shra.ph $t5, $t5, 6 \n" \
- "shra.ph $t4, $t4, 6 \n" \
- "addiu %[u_buf], 2 \n" \
- "addiu %[v_buf], 2 \n" \
- "addu.ph $t6, $t6, $t1 \n" \
- "mul.ph $t1, $t2, $s3 \n" \
- "addu.ph $t9, $t6, $t3 \n" \
- "addu.ph $t8, $t6, $t0 \n" \
- "shra.ph $t9, $t9, 6 \n" \
- "shra.ph $t8, $t8, 6 \n" \
- "addu.ph $t2, $t1, $t3 \n" \
- "addu.ph $t1, $t1, $t0 \n" \
- "shra.ph $t2, $t2, 6 \n" \
- "shra.ph $t1, $t1, 6 \n" \
- "subu.ph $t5, $t5, $s5 \n" \
- "subu.ph $t4, $t4, $s5 \n" \
- "subu.ph $t9, $t9, $s5 \n" \
- "subu.ph $t8, $t8, $s5 \n" \
- "subu.ph $t2, $t2, $s5 \n" \
- "subu.ph $t1, $t1, $s5 \n" \
- "shll_s.ph $t5, $t5, 8 \n" \
- "shll_s.ph $t4, $t4, 8 \n" \
- "shll_s.ph $t9, $t9, 8 \n" \
- "shll_s.ph $t8, $t8, 8 \n" \
- "shll_s.ph $t2, $t2, 8 \n" \
- "shll_s.ph $t1, $t1, 8 \n" \
- "shra.ph $t5, $t5, 8 \n" \
- "shra.ph $t4, $t4, 8 \n" \
- "shra.ph $t9, $t9, 8 \n" \
- "shra.ph $t8, $t8, 8 \n" \
- "shra.ph $t2, $t2, 8 \n" \
- "shra.ph $t1, $t1, 8 \n" \
- "addu.ph $t5, $t5, $s5 \n" \
- "addu.ph $t4, $t4, $s5 \n" \
- "addu.ph $t9, $t9, $s5 \n" \
- "addu.ph $t8, $t8, $s5 \n" \
- "addu.ph $t2, $t2, $s5 \n" \
- "addu.ph $t1, $t1, $s5 \n"
-
-void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128| // clipping
- "lui $s6, 0xff00 \n"
- "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
-// Arranging into argb format
- "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
- "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
- "addiu %[width], -4 \n"
- "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
- "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
- "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
-
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
- "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
- "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
- "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
- "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
- "sll $t9, $t9, 16 \n"
- "sll $t8, $t8, 16 \n"
- "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
- "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128|
- "lui $s6, 0xff00 \n"
- "ori $s6, 0xff00 \n" // |ff|00|ff|00|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
-// Arranging into abgr format
- "precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
- "precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
- "precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
- "precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
-
- "precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
- "addiu %[width], -4 \n"
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
- "or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
- "or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
- "precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
- "precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
- "sll $t9, $t9, 16 \n"
- "sll $t8, $t8, 16 \n"
- "packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
- "packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128|
- "lui $s6, 0xff \n"
- "ori $s6, 0xff \n" // |00|ff|00|ff|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
- // Arranging into bgra format
- "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
- "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
- "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
- "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
-
- "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
- "addiu %[width], -4 \n"
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
- "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
- "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
- "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
- "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
- "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
- "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
- "sll $t1, $t1, 16 \n"
- "sll $t2, $t2, 16 \n"
- "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
- "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-// Bilinear filter 8x2 -> 8x1
-void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- int y0_fraction = 256 - source_y_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "replv.ph $t0, %[y0_fraction] \n"
- "replv.ph $t1, %[source_y_fraction] \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t2, 0(%[src_ptr]) \n"
- "lw $t3, 0(%[src_ptr1]) \n"
- "lw $t4, 4(%[src_ptr]) \n"
- "lw $t5, 4(%[src_ptr1]) \n"
- "muleu_s.ph.qbl $t6, $t2, $t0 \n"
- "muleu_s.ph.qbr $t7, $t2, $t0 \n"
- "muleu_s.ph.qbl $t8, $t3, $t1 \n"
- "muleu_s.ph.qbr $t9, $t3, $t1 \n"
- "muleu_s.ph.qbl $t2, $t4, $t0 \n"
- "muleu_s.ph.qbr $t3, $t4, $t0 \n"
- "muleu_s.ph.qbl $t4, $t5, $t1 \n"
- "muleu_s.ph.qbr $t5, $t5, $t1 \n"
- "addq.ph $t6, $t6, $t8 \n"
- "addq.ph $t7, $t7, $t9 \n"
- "addq.ph $t2, $t2, $t4 \n"
- "addq.ph $t3, $t3, $t5 \n"
- "shra.ph $t6, $t6, 8 \n"
- "shra.ph $t7, $t7, 8 \n"
- "shra.ph $t2, $t2, 8 \n"
- "shra.ph $t3, $t3, 8 \n"
- "precr.qb.ph $t6, $t6, $t7 \n"
- "precr.qb.ph $t2, $t2, $t3 \n"
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[src_ptr1], %[src_ptr1], 8 \n"
- "addiu %[dst_width], %[dst_width], -8 \n"
- "sw $t6, 0(%[dst_ptr]) \n"
- "sw $t2, 4(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[dst_ptr], %[dst_ptr], 8 \n"
-
- ".set pop \n"
- : [dst_ptr] "+r" (dst_ptr),
- [src_ptr1] "+r" (src_ptr1),
- [src_ptr] "+r" (src_ptr),
- [dst_width] "+r" (dst_width)
- : [source_y_fraction] "r" (source_y_fraction),
- [y0_fraction] "r" (y0_fraction),
- [src_stride] "r" (src_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-#endif // __mips_dsp_rev >= 2
-
-#endif // defined(__mips__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc
deleted file mode 100755
index 68e380051b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc
+++ /dev/null
@@ -1,2847 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-// Read 8 Y, 4 U and 4 V from 422
-#define READYUV422 \
- "vld1.8 {d0}, [%0]! \n" \
- "vld1.32 {d2[0]}, [%1]! \n" \
- "vld1.32 {d2[1]}, [%2]! \n"
-
-// Read 8 Y, 2 U and 2 V from 422
-#define READYUV411 \
- "vld1.8 {d0}, [%0]! \n" \
- "vld1.16 {d2[0]}, [%1]! \n" \
- "vld1.16 {d2[1]}, [%2]! \n" \
- "vmov.u8 d3, d2 \n" \
- "vzip.u8 d2, d3 \n"
-
-// Read 8 Y, 8 U and 8 V from 444
-#define READYUV444 \
- "vld1.8 {d0}, [%0]! \n" \
- "vld1.8 {d2}, [%1]! \n" \
- "vld1.8 {d3}, [%2]! \n" \
- "vpaddl.u8 q1, q1 \n" \
- "vrshrn.u16 d2, q1, #1 \n"
-
-// Read 8 Y, and set 4 U and 4 V to 128
-#define READYUV400 \
- "vld1.8 {d0}, [%0]! \n" \
- "vmov.u8 d2, #128 \n"
-
-// Read 8 Y and 4 UV from NV12
-#define READNV12 \
- "vld1.8 {d0}, [%0]! \n" \
- "vld1.8 {d2}, [%1]! \n" \
- "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
-
-// Read 8 Y and 4 VU from NV21
-#define READNV21 \
- "vld1.8 {d0}, [%0]! \n" \
- "vld1.8 {d2}, [%1]! \n" \
- "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
- "vuzp.u8 d3, d2 \n" \
- "vtrn.u32 d2, d3 \n"
-
-// Read 8 YUY2
-#define READYUY2 \
- "vld2.8 {d0, d2}, [%0]! \n" \
- "vmov.u8 d3, d2 \n" \
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
-
-// Read 8 UYVY
-#define READUYVY \
- "vld2.8 {d2, d3}, [%0]! \n" \
- "vmov.u8 d0, d3 \n" \
- "vmov.u8 d3, d2 \n" \
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
-
-#define YUV422TORGB \
- "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\
- "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\
- "vmull.s8 q9, d2, d25 \n"/* u/v G component */\
- "vmov.u8 d1, #0 \n"/* split odd/even y apart */\
- "vtrn.u8 d0, d1 \n" \
- "vsub.s16 q0, q0, q15 \n"/* offset y */\
- "vmul.s16 q0, q0, q14 \n" \
- "vadd.s16 d18, d19 \n" \
- "vqadd.s16 d20, d0, d16 \n" /* B */ \
- "vqadd.s16 d21, d1, d16 \n" \
- "vqadd.s16 d22, d0, d17 \n" /* R */ \
- "vqadd.s16 d23, d1, d17 \n" \
- "vqadd.s16 d16, d0, d18 \n" /* G */ \
- "vqadd.s16 d17, d1, d18 \n" \
- "vqshrun.s16 d0, q10, #6 \n" /* B */ \
- "vqshrun.s16 d1, q11, #6 \n" /* G */ \
- "vqshrun.s16 d2, q8, #6 \n" /* R */ \
- "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\
- "vmovl.u8 q11, d1 \n" \
- "vmovl.u8 q8, d2 \n" \
- "vtrn.u8 d20, d21 \n" \
- "vtrn.u8 d22, d23 \n" \
- "vtrn.u8 d16, d17 \n" \
- "vmov.u8 d21, d16 \n"
-
-static vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-static vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV444
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I411ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV411
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d19, #255 \n"
- "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_bgra), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_abgr), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d19, #255 \n"
- "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgba), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vst3.8 {d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgb24), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vst3.8 {d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_raw), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-#define ARGBTORGB565 \
- "vshr.u8 d20, d20, #3 \n" /* B */ \
- "vshr.u8 d21, d21, #2 \n" /* G */ \
- "vshr.u8 d22, d22, #3 \n" /* R */ \
- "vmovl.u8 q8, d20 \n" /* B */ \
- "vmovl.u8 q9, d21 \n" /* G */ \
- "vmovl.u8 q10, d22 \n" /* R */ \
- "vshl.u16 q9, q9, #5 \n" /* G */ \
- "vshl.u16 q10, q10, #11 \n" /* R */ \
- "vorr q0, q8, q9 \n" /* BG */ \
- "vorr q0, q0, q10 \n" /* BGR */
-
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- ARGBTORGB565
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgb565), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-#define ARGBTOARGB1555 \
- "vshr.u8 q10, q10, #3 \n" /* B */ \
- "vshr.u8 d22, d22, #3 \n" /* R */ \
- "vshr.u8 d23, d23, #7 \n" /* A */ \
- "vmovl.u8 q8, d20 \n" /* B */ \
- "vmovl.u8 q9, d21 \n" /* G */ \
- "vmovl.u8 q10, d22 \n" /* R */ \
- "vmovl.u8 q11, d23 \n" /* A */ \
- "vshl.u16 q9, q9, #5 \n" /* G */ \
- "vshl.u16 q10, q10, #10 \n" /* R */ \
- "vshl.u16 q11, q11, #15 \n" /* A */ \
- "vorr q0, q8, q9 \n" /* BG */ \
- "vorr q1, q10, q11 \n" /* RA */ \
- "vorr q0, q0, q1 \n" /* BGRA */
-
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- ARGBTOARGB1555
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb1555), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-#define ARGBTOARGB4444 \
- "vshr.u8 d20, d20, #4 \n" /* B */ \
- "vbic.32 d21, d21, d4 \n" /* G */ \
- "vshr.u8 d22, d22, #4 \n" /* R */ \
- "vbic.32 d23, d23, d4 \n" /* A */ \
- "vorr d0, d20, d21 \n" /* BG */ \
- "vorr d1, d22, d23 \n" /* RA */ \
- "vzip.u8 d0, d1 \n" /* BGRA */
-
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%5] \n"
- "vld1.8 {d25}, [%6] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- "vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- ARGBTOARGB4444
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb4444), // %3
- "+r"(width) // %4
- : "r"(&kUVToRB), // %5
- "r"(&kUVToG) // %6
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void YToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%3] \n"
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUV400
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I400ToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- asm volatile (
- ".p2align 2 \n"
- "vmov.u8 d23, #255 \n"
- "1: \n"
- "vld1.8 {d20}, [%0]! \n"
- "vmov d21, d20 \n"
- "vmov d22, d20 \n"
- "subs %2, %2, #8 \n"
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "d20", "d21", "d22", "d23"
- );
-}
-
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%4] \n"
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READNV12
- YUV422TORGB
- "subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%4] \n"
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%4] \n"
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READNV12
- YUV422TORGB
- "subs %3, %3, #8 \n"
- ARGBTORGB565
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%4] \n"
- "vld1.8 {d25}, [%5] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- ARGBTORGB565
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : "r"(&kUVToRB), // %4
- "r"(&kUVToG) // %5
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%3] \n"
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READYUY2
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vld1.8 {d24}, [%3] \n"
- "vld1.8 {d25}, [%4] \n"
- "vmov.u8 d26, #128 \n"
- "vmov.u16 q14, #74 \n"
- "vmov.u16 q15, #16 \n"
- ".p2align 2 \n"
- "1: \n"
- READUYVY
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(&kUVToRB), // %3
- "r"(&kUVToG) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
- "subs %3, %3, #16 \n" // 16 processed per loop
- "vst1.8 {q0}, [%1]! \n" // store U
- "vst1.8 {q1}, [%2]! \n" // store V
- "bgt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3 // Output registers
- : // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// Reads 16 U's and V's and writes out 16 pairs of UV.
-void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load U
- "vld1.8 {q1}, [%1]! \n" // load V
- "subs %3, %3, #16 \n" // 16 processed per loop
- "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV
- "bgt 1b \n"
- :
- "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3 // Output registers
- : // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
-void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
- "subs %2, %2, #32 \n" // 32 processed per loop
- "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32
- "bgt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2 // Output registers
- : // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// SetRow8 writes 'count' bytes using a 32 bit value repeated.
-void SetRow_NEON(uint8* dst, uint32 v32, int count) {
- asm volatile (
- "vdup.u32 q0, %2 \n" // duplicate 4 ints
- "1: \n"
- "subs %1, %1, #16 \n" // 16 bytes per loop
- "vst1.8 {q0}, [%0]! \n" // store
- "bgt 1b \n"
- : "+r"(dst), // %0
- "+r"(count) // %1
- : "r"(v32) // %2
- : "cc", "memory", "q0"
- );
-}
-
-// TODO(fbarchard): Make fully assembler
-// SetRow32 writes 'count' words using a 32 bit value repeated.
-void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- for (int y = 0; y < height; ++y) {
- SetRow_NEON(dst, v32, width << 2);
- dst += dst_stride;
- }
-}
-
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
- asm volatile (
- // Start at end of source row.
- "mov r3, #-16 \n"
- "add %0, %0, %2 \n"
- "sub %0, #16 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #16 \n" // 16 pixels per loop.
- "vrev64.8 q0, q0 \n"
- "vst1.8 {d1}, [%1]! \n" // dst += 16
- "vst1.8 {d0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
- );
-}
-
-void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- asm volatile (
- // Start at end of source row.
- "mov r12, #-16 \n"
- "add %0, %0, %3, lsl #1 \n"
- "sub %0, #16 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
- "subs %3, #8 \n" // 8 pixels per loop.
- "vrev64.8 q0, q0 \n"
- "vst1.8 {d0}, [%1]! \n" // dst += 8
- "vst1.8 {d1}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "r12", "q0"
- );
-}
-
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
- asm volatile (
- // Start at end of source row.
- "mov r3, #-16 \n"
- "add %0, %0, %2, lsl #2 \n"
- "sub %0, #16 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #4 \n" // 4 pixels per loop.
- "vrev64.32 q0, q0 \n"
- "vst1.8 {d1}, [%1]! \n" // dst += 16
- "vst1.8 {d0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
- );
-}
-
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
- asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
- asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-#define RGB565TOARGB \
- "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
- "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
- "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \
- "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \
- "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
- "vorr.u8 d0, d0, d4 \n" /* B */ \
- "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \
- "vorr.u8 d2, d1, d5 \n" /* R */ \
- "vorr.u8 d1, d4, d6 \n" /* G */
-
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- RGB565TOARGB
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-#define ARGB1555TOARGB \
- "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \
- "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \
- "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \
- "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
- "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \
- "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \
- "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \
- "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \
- "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \
- "vorr.u8 q1, q1, q3 \n" /* R,A */ \
- "vorr.u8 q0, q0, q2 \n" /* B,G */ \
-
-// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
-#define RGB555TOARGB \
- "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \
- "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \
- "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \
- "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \
- "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
- "vorr.u8 d0, d0, d4 \n" /* B */ \
- "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \
- "vorr.u8 d2, d1, d5 \n" /* R */ \
- "vorr.u8 d1, d4, d6 \n" /* G */
-
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB1555TOARGB
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-#define ARGB4444TOARGB \
- "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \
- "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \
- "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \
- "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \
- "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \
- "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \
- "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
- "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
-
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB4444TOARGB
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2" // Clobber List
- );
-}
-
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_rgb24), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_raw), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
- "subs %2, %2, #16 \n" // 16 processed per loop.
- "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y.
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
- "subs %2, %2, #16 \n" // 16 processed per loop.
- "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y.
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
- "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
- "vst1.8 {d1}, [%1]! \n" // store 8 U.
- "vst1.8 {d3}, [%2]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
- );
-}
-
-void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
- "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
- "vst1.8 {d0}, [%1]! \n" // store 8 U.
- "vst1.8 {d2}, [%2]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
- );
-}
-
-void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // stride + src_yuy2
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
- "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2.
- "vrhadd.u8 d1, d1, d5 \n" // average rows of U
- "vrhadd.u8 d3, d3, d7 \n" // average rows of V
- "vst1.8 {d1}, [%2]! \n" // store 8 U.
- "vst1.8 {d3}, [%3]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(stride_yuy2), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
- );
-}
-
-void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // stride + src_uyvy
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
- "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY.
- "vrhadd.u8 d0, d0, d4 \n" // average rows of U
- "vrhadd.u8 d2, d2, d6 \n" // average rows of V
- "vst1.8 {d0}, [%2]! \n" // store 8 U.
- "vst1.8 {d2}, [%3]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(stride_uyvy), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
- );
-}
-
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %0 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels.
- "subs %3, %3, #16 \n" // 16 processed per loop
- "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels.
- "vrhadd.u8 q0, q1 \n" // average row 1 and 2
- "vst1.8 {q0}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(src_uv_stride), // %1
- "+r"(dst_uv), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
-void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- asm volatile (
- "vmov.u32 d6[0], %3 \n" // selector
- "1: \n"
- "vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop
- "vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels
- "vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels
- "vtrn.u32 d4, d5 \n" // combine 8 pixels
- "vst1.8 {d4}, [%1]! \n" // store 8.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- : "r"(selector) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-// Select G channels from ARGB. e.g. GGGGGGGG
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
- uint32 /*selector*/, int pix) {
- asm volatile (
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop
- "vst1.8 {d1}, [%1]! \n" // store 8 G's.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- "vld1.8 {q2}, [%3] \n" // shuffler
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 4 pixels.
- "subs %2, %2, #4 \n" // 4 processed per loop
- "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels
- "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels
- "vst1.8 {q1}, [%1]! \n" // store 4.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "cc", "memory", "q0", "q1", "q2" // Clobber List
- );
-}
-
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys
- "vld1.8 {d1}, [%1]! \n" // load 8 Us
- "vld1.8 {d3}, [%2]! \n" // load 8 Vs
- "subs %4, %4, #16 \n" // 16 pixels
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_yuy2), // %3
- "+r"(width) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3"
- );
-}
-
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys
- "vld1.8 {d0}, [%1]! \n" // load 8 Us
- "vld1.8 {d2}, [%2]! \n" // load 8 Vs
- "subs %4, %4, #16 \n" // 16 pixels
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_uyvy), // %3
- "+r"(width) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3"
- );
-}
-
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGBTORGB565
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_rgb565), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
- );
-}
-
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
- int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGBTOARGB1555
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb1555), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
- );
-}
-
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
- int pix) {
- asm volatile (
- "vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGBTOARGB4444
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb4444), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
- );
-}
-
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
- );
-}
-
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
- "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
- "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
- );
-}
-
-// 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient
- "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient
- "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient
- "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient
- "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlsl.u8 q2, d1, d25 \n" // G
- "vmlsl.u8 q2, d2, d26 \n" // R
- "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned
-
- "vmull.u8 q3, d2, d24 \n" // R
- "vmlsl.u8 q3, d1, d28 \n" // G
- "vmlsl.u8 q3, d0, d27 \n" // B
- "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned
-
- "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V
-
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
-
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
-
- "subs %3, %3, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q0, q10 \n" // B
- "vmls.s16 q8, q1, q11 \n" // G
- "vmls.s16 q8, q2, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
-
- "vmul.s16 q9, q2, q10 \n" // R
- "vmls.s16 q9, q1, q14 \n" // G
- "vmls.s16 q9, q0, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
-
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
-
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32.
-void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels.
- "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels.
- "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts.
- "vpadd.u16 d1, d8, d9 \n" // B
- "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts.
- "vpadd.u16 d3, d10, d11 \n" // G
- "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts.
- "vpadd.u16 d5, d12, d13 \n" // R
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %3, %3, #32 \n" // 32 processed per loop.
- "vmul.s16 q8, q0, q10 \n" // B
- "vmls.s16 q8, q1, q11 \n" // G
- "vmls.s16 q8, q2, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q2, q10 \n" // R
- "vmls.s16 q9, q1, q14 \n" // G
- "vmls.s16 q9, q0, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-#define RGBTOUV(QB, QG, QR) \
- "vmul.s16 q8, " #QB ", q10 \n" /* B */ \
- "vmls.s16 q8, " #QG ", q11 \n" /* G */ \
- "vmls.s16 q8, " #QR ", q12 \n" /* R */ \
- "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
- "vmul.s16 q9, " #QR ", q10 \n" /* R */ \
- "vmls.s16 q9, " #QG ", q14 \n" /* G */ \
- "vmls.s16 q9, " #QB ", q13 \n" /* B */ \
- "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
- "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
- "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
-
-// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
-void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride_argb), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// TODO(fbarchard): Subsample match C code.
-void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
- "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
- "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
- "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
- "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride_argb), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_bgra
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
- "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts.
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels.
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels.
- "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q1, q1, #1 \n" // 2x average
- "vrshr.u16 q2, q2, #1 \n"
- "vrshr.u16 q3, q3, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q3, q2, q1)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(src_stride_bgra), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_abgr
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
- "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
- "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q2, q1, q0)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(src_stride_abgr), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_rgba
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
- "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts.
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels.
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels.
- "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(src_stride_rgba), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_rgb24
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
- "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels.
- "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(src_stride_rgb24), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_raw
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
- "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
- "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
- "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels.
- "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels.
- "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q2, q1, q0)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(src_stride_raw), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- RGB565TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels.
- RGB565TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels.
- RGB565TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels.
- RGB565TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
-
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(src_stride_rgb565), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
-
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(src_stride_argb1555), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
-
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(src_stride_argb4444), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- RGB565TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
- );
-}
-
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB1555TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
- );
-}
-
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB4444TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
- );
-}
-
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d1, d4 \n" // R
- "vmlal.u8 q8, d2, d5 \n" // G
- "vmlal.u8 q8, d3, d6 \n" // B
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d0, d4 \n" // R
- "vmlal.u8 q8, d1, d5 \n" // G
- "vmlal.u8 q8, d2, d6 \n" // B
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d1, d4 \n" // B
- "vmlal.u8 q8, d2, d5 \n" // G
- "vmlal.u8 q8, d3, d6 \n" // R
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d0, d4 \n" // B
- "vmlal.u8 q8, d1, d5 \n" // G
- "vmlal.u8 q8, d2, d6 \n" // R
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d0, d4 \n" // B
- "vmlal.u8 q8, d1, d5 \n" // G
- "vmlal.u8 q8, d2, d6 \n" // R
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_NEON(uint8* dst_ptr,
- const uint8* src_ptr, ptrdiff_t src_stride,
- int dst_width, int source_y_fraction) {
- asm volatile (
- "cmp %4, #0 \n"
- "beq 100f \n"
- "add %2, %1 \n"
- "cmp %4, #64 \n"
- "beq 75f \n"
- "cmp %4, #128 \n"
- "beq 50f \n"
- "cmp %4, #192 \n"
- "beq 25f \n"
-
- "vdup.8 d5, %4 \n"
- "rsb %4, #256 \n"
- "vdup.8 d4, %4 \n"
- // General purpose row blend.
- "1: \n"
- "vld1.8 {q0}, [%1]! \n"
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vmull.u8 q13, d0, d4 \n"
- "vmull.u8 q14, d1, d4 \n"
- "vmlal.u8 q13, d2, d5 \n"
- "vmlal.u8 q14, d3, d5 \n"
- "vrshrn.u16 d0, q13, #8 \n"
- "vrshrn.u16 d1, q14, #8 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- "vld1.8 {q0}, [%1]! \n"
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 25b \n"
- "b 99f \n"
-
- // Blend 50 / 50.
- "50: \n"
- "vld1.8 {q0}, [%1]! \n"
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- "vld1.8 {q1}, [%1]! \n"
- "vld1.8 {q0}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 75b \n"
- "b 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- "100: \n"
- "vld1.8 {q0}, [%1]! \n"
- "subs %3, %3, #16 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(src_stride), // %2
- "+r"(dst_width), // %3
- "+r"(source_y_fraction) // %4
- :
- : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
- );
-}
-
-// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "subs %3, #8 \n"
- "blt 89f \n"
- // Blend 8 pixels.
- "8: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0.
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q10, d4, d3 \n" // db * a
- "vmull.u8 q11, d5, d3 \n" // dg * a
- "vmull.u8 q12, d6, d3 \n" // dr * a
- "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8
- "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8
- "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8
- "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256
- "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256
- "vqadd.u8 q0, q0, q2 \n" // + sbg
- "vqadd.u8 d2, d2, d6 \n" // + sr
- "vmov.u8 d3, #255 \n" // a = 255
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB.
- "bge 8b \n"
-
- "89: \n"
- "adds %3, #8-1 \n"
- "blt 99f \n"
-
- // Blend 1 pixels.
- "1: \n"
- "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0.
- "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1.
- "subs %3, %3, #1 \n" // 1 processed per loop.
- "vmull.u8 q10, d4, d3 \n" // db * a
- "vmull.u8 q11, d5, d3 \n" // dg * a
- "vmull.u8 q12, d6, d3 \n" // dr * a
- "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8
- "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8
- "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8
- "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256
- "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256
- "vqadd.u8 q0, q0, q2 \n" // + sbg
- "vqadd.u8 d2, d2, d6 \n" // + sr
- "vmov.u8 d3, #255 \n" // a = 255
- "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel.
- "bge 1b \n"
-
- "99: \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"
- );
-}
-
-// Attenuate 8 pixels at a time.
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- // Attenuate 8 pixels.
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q10, d0, d3 \n" // b * a
- "vmull.u8 q11, d1, d3 \n" // g * a
- "vmull.u8 q12, d2, d3 \n" // r * a
- "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8
- "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8
- "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "q0", "q1", "q10", "q11", "q12"
- );
-}
-
-// Quantize 8 ARGB pixels (32 bytes).
-// dst = (dst * scale >> 16) * interval_size + interval_offset;
-void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- asm volatile (
- "vdup.u16 q8, %2 \n"
- "vshr.u16 q8, q8, #1 \n" // scale >>= 1
- "vdup.u16 q9, %3 \n" // interval multiply.
- "vdup.u16 q10, %4 \n" // interval add
-
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB.
- "subs %1, %1, #8 \n" // 8 processed per loop.
- "vmovl.u8 q0, d0 \n" // b (0 .. 255)
- "vmovl.u8 q1, d2 \n"
- "vmovl.u8 q2, d4 \n"
- "vqdmulh.s16 q0, q0, q8 \n" // b * scale
- "vqdmulh.s16 q1, q1, q8 \n" // g
- "vqdmulh.s16 q2, q2, q8 \n" // r
- "vmul.u16 q0, q0, q9 \n" // b * interval_size
- "vmul.u16 q1, q1, q9 \n" // g
- "vmul.u16 q2, q2, q9 \n" // r
- "vadd.u16 q0, q0, q10 \n" // b + interval_offset
- "vadd.u16 q1, q1, q10 \n" // g
- "vadd.u16 q2, q2, q10 \n" // r
- "vqmovn.u16 d0, q0 \n"
- "vqmovn.u16 d2, q1 \n"
- "vqmovn.u16 d4, q2 \n"
- "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "r"(scale), // %2
- "r"(interval_size), // %3
- "r"(interval_offset) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"
- );
-}
-
-// Shade 8 pixels at a time by specified value.
-// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
-// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
-void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- asm volatile (
- "vdup.u32 q0, %3 \n" // duplicate scale value.
- "vzip.u8 d0, d1 \n" // d0 aarrggbb.
- "vshr.u16 q0, q0, #1 \n" // scale / 2.
-
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmovl.u8 q10, d20 \n" // b (0 .. 255)
- "vmovl.u8 q11, d22 \n"
- "vmovl.u8 q12, d24 \n"
- "vmovl.u8 q13, d26 \n"
- "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2
- "vqrdmulh.s16 q11, q11, d0[1] \n" // g
- "vqrdmulh.s16 q12, q12, d0[2] \n" // r
- "vqrdmulh.s16 q13, q13, d0[3] \n" // a
- "vqmovn.u16 d20, q10 \n"
- "vqmovn.u16 d22, q11 \n"
- "vqmovn.u16 d24, q12 \n"
- "vqmovn.u16 d26, q13 \n"
- "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(value) // %3
- : "cc", "memory", "q0", "q10", "q11", "q12", "q13"
- );
-}
-
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-// Similar to ARGBToYJ but stores ARGB.
-// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
- "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
- "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B
- "vmov d1, d0 \n" // G
- "vmov d2, d0 \n" // R
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
- );
-}
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-// b = (r * 35 + g * 68 + b * 17) >> 7
-// g = (r * 45 + g * 88 + b * 22) >> 7
-// r = (r * 50 + g * 98 + b * 24) >> 7
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d20, #17 \n" // BB coefficient
- "vmov.u8 d21, #68 \n" // BG coefficient
- "vmov.u8 d22, #35 \n" // BR coefficient
- "vmov.u8 d24, #22 \n" // GB coefficient
- "vmov.u8 d25, #88 \n" // GG coefficient
- "vmov.u8 d26, #45 \n" // GR coefficient
- "vmov.u8 d28, #24 \n" // BB coefficient
- "vmov.u8 d29, #98 \n" // BG coefficient
- "vmov.u8 d30, #50 \n" // BR coefficient
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels.
- "subs %1, %1, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d20 \n" // B to Sepia B
- "vmlal.u8 q2, d1, d21 \n" // G
- "vmlal.u8 q2, d2, d22 \n" // R
- "vmull.u8 q3, d0, d24 \n" // B to Sepia G
- "vmlal.u8 q3, d1, d25 \n" // G
- "vmlal.u8 q3, d2, d26 \n" // R
- "vmull.u8 q8, d0, d28 \n" // B to Sepia R
- "vmlal.u8 q8, d1, d29 \n" // G
- "vmlal.u8 q8, d2, d30 \n" // R
- "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B
- "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G
- "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R
- "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- :
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
-// needs to saturate. Consider doing a non-saturating version.
-void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- asm volatile (
- "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors.
- "vmovl.s8 q0, d4 \n" // B,G coefficients s16.
- "vmovl.s8 q1, d5 \n" // R,A coefficients s16.
-
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit
- "vmovl.u8 q9, d18 \n" // g
- "vmovl.u8 q10, d20 \n" // r
- "vmovl.u8 q15, d22 \n" // a
- "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B
- "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G
- "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R
- "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A
- "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B
- "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G
- "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R
- "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A
- "vqadd.s16 q12, q12, q4 \n" // Accumulate B
- "vqadd.s16 q13, q13, q5 \n" // Accumulate G
- "vqadd.s16 q14, q14, q6 \n" // Accumulate R
- "vqadd.s16 q15, q15, q7 \n" // Accumulate A
- "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B
- "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G
- "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R
- "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A
- "vqadd.s16 q12, q12, q4 \n" // Accumulate B
- "vqadd.s16 q13, q13, q5 \n" // Accumulate G
- "vqadd.s16 q14, q14, q6 \n" // Accumulate R
- "vqadd.s16 q15, q15, q7 \n" // Accumulate A
- "vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B
- "vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G
- "vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R
- "vmul.s16 q7, q15, d3[3] \n" // A += A * Matrix A
- "vqadd.s16 q12, q12, q4 \n" // Accumulate B
- "vqadd.s16 q13, q13, q5 \n" // Accumulate G
- "vqadd.s16 q14, q14, q6 \n" // Accumulate R
- "vqadd.s16 q15, q15, q7 \n" // Accumulate A
- "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B
- "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G
- "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R
- "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A
- "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(matrix_argb) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
- "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
-#ifdef HAS_ARGBMULTIPLYROW_NEON
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q0, d0, d1 \n" // multiply B
- "vmull.u8 q1, d2, d3 \n" // multiply G
- "vmull.u8 q2, d4, d5 \n" // multiply R
- "vmull.u8 q3, d6, d7 \n" // multiply A
- "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B
- "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G
- "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R
- "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3"
- );
-}
-#endif // HAS_ARGBMULTIPLYROW_NEON
-
-// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 q0, q0, q2 \n" // add B, G
- "vqadd.u8 q1, q1, q3 \n" // add R, A
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3"
- );
-}
-
-// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqsub.u8 q0, q0, q2 \n" // subtract B, G
- "vqsub.u8 q1, q1, q3 \n" // subtract R, A
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3"
- );
-}
-
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
- "vld1.8 {d1}, [%1]! \n" // load 8 sobely.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d0, d0, d1 \n" // add
- "vmov.u8 d1, d0 \n"
- "vmov.u8 d2, d0 \n"
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1"
- );
-}
-
-// Adds Sobel X and Sobel Y and stores Sobel into plane.
-void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- asm volatile (
- // 16 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
- "vld1.8 {q1}, [%1]! \n" // load 16 sobely.
- "subs %3, %3, #16 \n" // 16 processed per loop.
- "vqadd.u8 q0, q0, q1 \n" // add
- "vst1.8 {q0}, [%2]! \n" // store 16 pixels.
- "bgt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_y), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1"
- );
-}
-
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
- "vld1.8 {d0}, [%1]! \n" // load 8 sobely.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d1, d0, d2 \n" // add
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1"
- );
-}
-
-// SobelX as a matrix is
-// -1 0 1
-// -2 0 2
-// -1 0 1
-void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d0}, [%0],%5 \n" // top
- "vld1.8 {d1}, [%0],%6 \n"
- "vsubl.u8 q0, d0, d1 \n"
- "vld1.8 {d2}, [%1],%5 \n" // center * 2
- "vld1.8 {d3}, [%1],%6 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vld1.8 {d2}, [%2],%5 \n" // bottom
- "vld1.8 {d3}, [%2],%6 \n"
- "subs %4, %4, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
- "vst1.8 {d0}, [%3]! \n" // store 8 sobelx
- "bgt 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(src_y2), // %2
- "+r"(dst_sobelx), // %3
- "+r"(width) // %4
- : "r"(2), // %5
- "r"(6) // %6
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// SobelY as a matrix is
-// -1 -2 -1
-// 0 0 0
-// 1 2 1
-void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d0}, [%0],%4 \n" // left
- "vld1.8 {d1}, [%1],%4 \n"
- "vsubl.u8 q0, d0, d1 \n"
- "vld1.8 {d2}, [%0],%4 \n" // center * 2
- "vld1.8 {d3}, [%1],%4 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vld1.8 {d2}, [%0],%5 \n" // right
- "vld1.8 {d3}, [%1],%5 \n"
- "subs %3, %3, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
- "vst1.8 {d0}, [%2]! \n" // store 8 sobely
- "bgt 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(dst_sobely), // %2
- "+r"(width) // %3
- : "r"(1), // %4
- "r"(6) // %5
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-#endif // __ARM_NEON__
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc
deleted file mode 100755
index 106fda5689..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc
+++ /dev/null
@@ -1,6443 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
-
-// Constants for ARGB
-static vec8 kARGBToY = {
- 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
-};
-
-// JPeg full range.
-static vec8 kARGBToYJ = {
- 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
-};
-#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
-
-#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
-
-static vec8 kARGBToU = {
- 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
-};
-
-static vec8 kARGBToUJ = {
- 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
-};
-
-static vec8 kARGBToV = {
- -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
-};
-
-static vec8 kARGBToVJ = {
- -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
-};
-
-// Constants for BGRA
-static vec8 kBGRAToY = {
- 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
-};
-
-static vec8 kBGRAToU = {
- 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
-};
-
-static vec8 kBGRAToV = {
- 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
-};
-
-// Constants for ABGR
-static vec8 kABGRToY = {
- 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
-};
-
-static vec8 kABGRToU = {
- -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
-};
-
-static vec8 kABGRToV = {
- 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
-};
-
-// Constants for RGBA.
-static vec8 kRGBAToY = {
- 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
-};
-
-static vec8 kRGBAToU = {
- 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
-};
-
-static vec8 kRGBAToV = {
- 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
-};
-
-static uvec8 kAddY16 = {
- 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
-};
-
-static vec16 kAddYJ64 = {
- 64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static uvec8 kAddUV128 = {
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
-};
-
-static uvec16 kAddUVJ128 = {
- 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
-};
-#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
-
-#ifdef HAS_RGB24TOARGBROW_SSSE3
-
-// Shuffle table for converting RGB24 to ARGB.
-static uvec8 kShuffleMaskRGB24ToARGB = {
- 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
-};
-
-// Shuffle table for converting RAW to ARGB.
-static uvec8 kShuffleMaskRAWToARGB = {
- 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
-};
-
-// Shuffle table for converting ARGB to RGB24.
-static uvec8 kShuffleMaskARGBToRGB24 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static uvec8 kShuffleMaskARGBToRAW = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4
-static uvec8 kShuffleMaskARGBToRGB24_0 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static uvec8 kShuffleMaskARGBToRAW_0 = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
-};
-#endif // HAS_RGB24TOARGBROW_SSSE3
-
-#if defined(TESTING) && defined(__x86_64__)
-void TestRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- asm volatile (
- ".p2align 5 \n"
- "mov %%eax,%%eax \n"
- "mov %%ebx,%%ebx \n"
- "mov %%ecx,%%ecx \n"
- "mov %%edx,%%edx \n"
- "mov %%esi,%%esi \n"
- "mov %%edi,%%edi \n"
- "mov %%ebp,%%ebp \n"
- "mov %%esp,%%esp \n"
- ".p2align 5 \n"
- "mov %%r8d,%%r8d \n"
- "mov %%r9d,%%r9d \n"
- "mov %%r10d,%%r10d \n"
- "mov %%r11d,%%r11d \n"
- "mov %%r12d,%%r12d \n"
- "mov %%r13d,%%r13d \n"
- "mov %%r14d,%%r14d \n"
- "mov %%r15d,%%r15d \n"
- ".p2align 5 \n"
- "lea (%%rax),%%eax \n"
- "lea (%%rbx),%%ebx \n"
- "lea (%%rcx),%%ecx \n"
- "lea (%%rdx),%%edx \n"
- "lea (%%rsi),%%esi \n"
- "lea (%%rdi),%%edi \n"
- "lea (%%rbp),%%ebp \n"
- "lea (%%rsp),%%esp \n"
- ".p2align 5 \n"
- "lea (%%r8),%%r8d \n"
- "lea (%%r9),%%r9d \n"
- "lea (%%r10),%%r10d \n"
- "lea (%%r11),%%r11d \n"
- "lea (%%r12),%%r12d \n"
- "lea (%%r13),%%r13d \n"
- "lea (%%r14),%%r14d \n"
- "lea (%%r15),%%r15d \n"
-
- ".p2align 5 \n"
- "lea 0x10(%%rax),%%eax \n"
- "lea 0x10(%%rbx),%%ebx \n"
- "lea 0x10(%%rcx),%%ecx \n"
- "lea 0x10(%%rdx),%%edx \n"
- "lea 0x10(%%rsi),%%esi \n"
- "lea 0x10(%%rdi),%%edi \n"
- "lea 0x10(%%rbp),%%ebp \n"
- "lea 0x10(%%rsp),%%esp \n"
- ".p2align 5 \n"
- "lea 0x10(%%r8),%%r8d \n"
- "lea 0x10(%%r9),%%r9d \n"
- "lea 0x10(%%r10),%%r10d \n"
- "lea 0x10(%%r11),%%r11d \n"
- "lea 0x10(%%r12),%%r12d \n"
- "lea 0x10(%%r13),%%r13d \n"
- "lea 0x10(%%r14),%%r14d \n"
- "lea 0x10(%%r15),%%r15d \n"
-
- ".p2align 5 \n"
- "add 0x10,%%eax \n"
- "add 0x10,%%ebx \n"
- "add 0x10,%%ecx \n"
- "add 0x10,%%edx \n"
- "add 0x10,%%esi \n"
- "add 0x10,%%edi \n"
- "add 0x10,%%ebp \n"
- "add 0x10,%%esp \n"
- ".p2align 5 \n"
- "add 0x10,%%r8d \n"
- "add 0x10,%%r9d \n"
- "add 0x10,%%r10d \n"
- "add 0x10,%%r11d \n"
- "add 0x10,%%r12d \n"
- "add 0x10,%%r13d \n"
- "add 0x10,%%r14d \n"
- "add 0x10,%%r15d \n"
-
- ".p2align 2 \n"
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // TESTING
-
-#ifdef HAS_I400TOARGBROW_SSE2
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm5,%%xmm0 \n"
- "por %%xmm5,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
- int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm5,%%xmm0 \n"
- "por %%xmm5,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_I400TOARGBROW_SSE2
-
-#ifdef HAS_RGB24TOARGBROW_SSSE3
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
- "pslld $0x18,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x30,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm2 \n"
- "palignr $0x8,%%xmm1,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm2 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqa %%xmm2," MEMACCESS2(0x20,1) " \n"
- "por %%xmm5,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "palignr $0x4,%%xmm3,%%xmm3 \n"
- "pshufb %%xmm4,%%xmm3 \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "por %%xmm5,%%xmm3 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm3," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskRGB24ToARGB) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
- "pslld $0x18,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x30,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm2 \n"
- "palignr $0x8,%%xmm1,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm2 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqa %%xmm2," MEMACCESS2(0x20,1) " \n"
- "por %%xmm5,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "palignr $0x4,%%xmm3,%%xmm3 \n"
- "pshufb %%xmm4,%%xmm3 \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "por %%xmm5,%%xmm3 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm3," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskRAWToARGB) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "mov $0x1080108,%%eax \n"
- "movd %%eax,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x20802080,%%eax \n"
- "movd %%eax,%%xmm6 \n"
- "pshufd $0x0,%%xmm6,%%xmm6 \n"
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psllw $0xb,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0xa,%%xmm4 \n"
- "psrlw $0x5,%%xmm4 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psllw $0x8,%%xmm7 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm3,%%xmm1 \n"
- "psllw $0xb,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "psllw $0x8,%%xmm1 \n"
- "por %%xmm2,%%xmm1 \n"
- "pand %%xmm4,%%xmm0 \n"
- "pmulhuw %%xmm6,%%xmm0 \n"
- "por %%xmm7,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm1,0x00,1,0,2) // movdqa %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqa,xmm2,0x10,1,0,2) // movdqa %%xmm2,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "mov $0x1080108,%%eax \n"
- "movd %%eax,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x42004200,%%eax \n"
- "movd %%eax,%%xmm6 \n"
- "pshufd $0x0,%%xmm6,%%xmm6 \n"
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psllw $0xb,%%xmm3 \n"
- "movdqa %%xmm3,%%xmm4 \n"
- "psrlw $0x6,%%xmm4 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psllw $0x8,%%xmm7 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psllw $0x1,%%xmm1 \n"
- "psllw $0xb,%%xmm2 \n"
- "pand %%xmm3,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "psllw $0x8,%%xmm1 \n"
- "por %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm4,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "pmulhuw %%xmm6,%%xmm0 \n"
- "pand %%xmm7,%%xmm2 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm1,0x00,1,0,2) // movdqa %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqa,xmm2,0x10,1,0,2) // movdqa %%xmm2,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "mov $0xf0f0f0f,%%eax \n"
- "movd %%eax,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "pslld $0x4,%%xmm5 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm4,%%xmm0 \n"
- "pand %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "psllw $0x4,%%xmm1 \n"
- "psrlw $0x4,%%xmm3 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,2) // movdqa %%xmm0,(%1,%0,2)
- MEMOPMEM(movdqa,xmm1,0x10,1,0,2) // movdqa %%xmm1,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "movdqa %3,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "pshufb %%xmm6,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "pshufb %%xmm6,%%xmm2 \n"
- "pshufb %%xmm6,%%xmm3 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "psrldq $0x4,%%xmm1 \n"
- "pslldq $0xc,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm5 \n"
- "por %%xmm4,%%xmm0 \n"
- "pslldq $0x8,%%xmm5 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "psrldq $0x8,%%xmm2 \n"
- "pslldq $0x4,%%xmm3 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x30,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskARGBToRGB24) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "movdqa %3,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "pshufb %%xmm6,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "pshufb %%xmm6,%%xmm2 \n"
- "pshufb %%xmm6,%%xmm3 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "psrldq $0x4,%%xmm1 \n"
- "pslldq $0xc,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm5 \n"
- "por %%xmm4,%%xmm0 \n"
- "pslldq $0x8,%%xmm5 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "psrldq $0x8,%%xmm2 \n"
- "pslldq $0x4,%%xmm3 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x30,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskARGBToRAW) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psrld $0x1b,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrld $0x1a,%%xmm4 \n"
- "pslld $0x5,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0xb,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pslld $0x8,%%xmm0 \n"
- "psrld $0x3,%%xmm1 \n"
- "psrld $0x5,%%xmm2 \n"
- "psrad $0x10,%%xmm0 \n"
- "pand %%xmm3,%%xmm1 \n"
- "pand %%xmm4,%%xmm2 \n"
- "pand %%xmm5,%%xmm0 \n"
- "por %%xmm2,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrld $0x1b,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "pslld $0x5,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "pslld $0xa,%%xmm6 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "pslld $0xf,%%xmm7 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "psrad $0x10,%%xmm0 \n"
- "psrld $0x3,%%xmm1 \n"
- "psrld $0x6,%%xmm2 \n"
- "psrld $0x9,%%xmm3 \n"
- "pand %%xmm7,%%xmm0 \n"
- "pand %%xmm4,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm6,%%xmm3 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm3,%%xmm2 \n"
- "por %%xmm2,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMACCESS2(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0xc,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm3 \n"
- "psrlw $0x8,%%xmm3 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm3,%%xmm0 \n"
- "pand %%xmm4,%%xmm1 \n"
- "psrlq $0x4,%%xmm0 \n"
- "psrlq $0x8,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
- );
-}
-#endif // HAS_RGB24TOARGBROW_SSSE3
-
-#ifdef HAS_ARGBTOYROW_SSSE3
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBTOYROW_SSSE3
-
-#ifdef HAS_ARGBTOYJROW_SSSE3
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBTOYJROW_SSSE3
-
-#ifdef HAS_ARGBTOUVROW_SSSE3
-// TODO(fbarchard): pass xmm constants to single block of assembly.
-// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
-// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
-// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around
-// and considered unsafe.
-void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
-void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToUJ), // %0
- "m"(kARGBToVJ), // %1
- "m"(kAddUVJ128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToUJ), // %0
- "m"(kARGBToVJ), // %1
- "m"(kAddUVJ128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb))
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,2,1) // movdqa %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6"
-#endif
- );
-}
-
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u,
- uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6"
-#endif
- );
-}
-
-void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kARGBToU), // %0
- "m"(kARGBToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kBGRAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kBGRAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kBGRAToU), // %0
- "m"(kBGRAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_bgra0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_bgra)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kBGRAToU), // %0
- "m"(kBGRAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_bgra0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_bgra)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kABGRToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kABGRToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kRGBAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kRGBAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kABGRToU), // %0
- "m"(kABGRToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_abgr0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_abgr)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kABGRToU), // %0
- "m"(kABGRToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_abgr0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_abgr)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kRGBAToU), // %0
- "m"(kRGBAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0
- MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1
- MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2
- MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgba0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_rgba))
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kRGBAToU), // %0
- "m"(kRGBAToV), // %1
- "m"(kAddUV128) // %2
- );
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_rgba0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_rgba)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_ARGBTOUVROW_SSSE3
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-struct {
- vec8 kUVToB; // 0
- vec8 kUVToG; // 16
- vec8 kUVToR; // 32
- vec16 kUVBiasB; // 48
- vec16 kUVBiasG; // 64
- vec16 kUVBiasR; // 80
- vec16 kYSub16; // 96
- vec16 kYToRgb; // 112
- vec8 kVUToB; // 128
- vec8 kVUToG; // 144
- vec8 kVUToR; // 160
-} static SIMD_ALIGNED(kYuvConstants) = {
- { UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
- { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
- { UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR },
- { BB, BB, BB, BB, BB, BB, BB, BB },
- { BG, BG, BG, BG, BG, BG, BG, BG },
- { BR, BR, BR, BR, BR, BR, BR, BR },
- { 16, 16, 16, 16, 16, 16, 16, 16 },
- { YG, YG, YG, YG, YG, YG, YG, YG },
- { VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB },
- { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
- { VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR }
-};
-
-
-// Read 8 UV from 411
-#define READYUV444 \
- "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- BUNDLEALIGN \
- MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n"
-
-// Read 4 UV from 422, upsample to 8 UV
-#define READYUV422 \
- "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- BUNDLEALIGN \
- MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n"
-
-// Read 2 UV from 411, upsample to 8 UV
-#define READYUV411 \
- "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- BUNDLEALIGN \
- MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n" \
- "punpckldq %%xmm0,%%xmm0 \n"
-
-// Read 4 UV from NV12, upsample to 8 UV
-#define READNV12 \
- "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
- "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
- "punpcklwd %%xmm0,%%xmm0 \n"
-
-// Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB \
- "movdqa %%xmm0,%%xmm1 \n" \
- "movdqa %%xmm0,%%xmm2 \n" \
- "pmaddubsw " MEMACCESS([kYuvConstants]) ",%%xmm0 \n" \
- "pmaddubsw " MEMACCESS2(16, [kYuvConstants]) ",%%xmm1 \n" \
- "pmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2 \n" \
- "psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \
- "psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \
- "psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
- "punpcklbw %%xmm4,%%xmm3 \n" \
- "psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
- "pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \
- "paddsw %%xmm3,%%xmm0 \n" \
- "paddsw %%xmm3,%%xmm1 \n" \
- "paddsw %%xmm3,%%xmm2 \n" \
- "psraw $0x6,%%xmm0 \n" \
- "psraw $0x6,%%xmm1 \n" \
- "psraw $0x6,%%xmm2 \n" \
- "packuswb %%xmm0,%%xmm0 \n" \
- "packuswb %%xmm1,%%xmm1 \n" \
- "packuswb %%xmm2,%%xmm2 \n"
-
-// Convert 8 pixels: 8 VU and 8 Y
-#define YVUTORGB \
- "movdqa %%xmm0,%%xmm1 \n" \
- "movdqa %%xmm0,%%xmm2 \n" \
- "pmaddubsw " MEMACCESS2(128, [kYuvConstants]) ",%%xmm0 \n" \
- "pmaddubsw " MEMACCESS2(144, [kYuvConstants]) ",%%xmm1 \n" \
- "pmaddubsw " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2 \n" \
- "psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \
- "psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \
- "psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
- "punpcklbw %%xmm4,%%xmm3 \n" \
- "psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \
- "pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \
- "paddsw %%xmm3,%%xmm0 \n" \
- "paddsw %%xmm3,%%xmm1 \n" \
- "paddsw %%xmm3,%%xmm2 \n" \
- "psraw $0x6,%%xmm0 \n" \
- "psraw $0x6,%%xmm1 \n" \
- "psraw $0x6,%%xmm2 \n" \
- "packuswb %%xmm0,%%xmm0 \n" \
- "packuswb %%xmm1,%%xmm1 \n" \
- "packuswb %%xmm2,%%xmm2 \n"
-
-void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV444
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgb24,
- int width) {
-// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
-#if defined(__i386__)
- asm volatile (
- "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
- :: [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
- [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24));
-#endif
-
- asm volatile (
-#if !defined(__i386__)
- "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
-#endif
- "sub %[u_buf],%[v_buf] \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
- "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
-#if !defined(__i386__)
- , [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
- [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
-#endif
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_raw,
- int width) {
-// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
-#if defined(__i386__)
- asm volatile (
- "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
- :: [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
- [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW));
-#endif
-
- asm volatile (
-#if !defined(__i386__)
- "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
-#endif
- "sub %[u_buf],%[v_buf] \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS([dst_raw]) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
- "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_raw]"+r"(dst_raw), // %[dst_raw]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
-#if !defined(__i386__)
- , [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
- [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
-#endif
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV411
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YVUTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV444
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV411
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YVUTORGB
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
- "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
- // Does not use r14.
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqa %%xmm5," MEMACCESS([dst_bgra]) "\n"
- "movdqa %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
- "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm2 \n"
- "punpckhwd %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2," MEMACCESS([dst_abgr]) "\n"
- "movdqa %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
- "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm2,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqa %%xmm5," MEMACCESS([dst_rgba]) "\n"
- "movdqa %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
- "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqu %%xmm5," MEMACCESS([dst_bgra]) "\n"
- "movdqu %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
- "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "punpcklbw %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm2 \n"
- "punpckhwd %%xmm0,%%xmm1 \n"
- "movdqu %%xmm2," MEMACCESS([dst_abgr]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
- "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm2,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "movdqa %%xmm5,%%xmm0 \n"
- "punpcklwd %%xmm1,%%xmm5 \n"
- "punpckhwd %%xmm1,%%xmm0 \n"
- "movdqu %%xmm5," MEMACCESS([dst_rgba]) "\n"
- "movdqu %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
- "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-#endif // HAS_I422TOARGBROW_SSSE3
-
-#ifdef HAS_YTOARGBROW_SSE2
-void YToARGBRow_SSE2(const uint8* y_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "mov $0x00100010,%%eax \n"
- "movd %%eax,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "mov $0x004a004a,%%eax \n"
- "movd %%eax,%%xmm2 \n"
- "pshufd $0x0,%%xmm2,%%xmm2 \n"
- LABELALIGN
- "1: \n"
- // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "psubusw %%xmm3,%%xmm0 \n"
- "pmullw %%xmm2,%%xmm0 \n"
- "psrlw $6, %%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
-
- // Step 2: Weave into ARGB
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "por %%xmm4,%%xmm1 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
-
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(y_buf), // %0
- "+r"(dst_argb), // %1
- "+rm"(width) // %2
- :
- : "memory", "cc", "eax"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
- );
-}
-#endif // HAS_YTOARGBROW_SSE2
-
-#ifdef HAS_MIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static uvec8 kShuffleMirror = {
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "movdqa %3,%%xmm5 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- LABELALIGN
- "1: \n"
- MEMOPREG(movdqa,0x00,0,2,1,xmm0) // movdqa (%0,%2),%%xmm0
- "pshufb %%xmm5,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kShuffleMirror) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm5"
-#endif
- );
-}
-#endif // HAS_MIRRORROW_SSSE3
-
-#ifdef HAS_MIRRORROW_SSE2
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- LABELALIGN
- "1: \n"
- MEMOPREG(movdqu,0x00,0,2,1,xmm0) // movdqu (%0,%2),%%xmm0
- "movdqa %%xmm0,%%xmm1 \n"
- "psllw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "pshuflw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufhw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufd $0x4e,%%xmm0,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1)",%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-#endif // HAS_MIRRORROW_SSE2
-
-#ifdef HAS_MIRRORROW_UV_SSSE3
-// Shuffle table for reversing the bytes of UV channels.
-static uvec8 kShuffleMirrorUV = {
- 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
-};
-void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
- int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "movdqa %4,%%xmm1 \n"
- "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- "pshufb %%xmm1,%%xmm0 \n"
- "sub $8,%3 \n"
- "movlpd %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(temp_width) // %3
- : "m"(kShuffleMirrorUV) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-#endif // HAS_MIRRORROW_UV_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static uvec8 kARGBShuffleMirror = {
- 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
-};
-
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n"
- "movdqa %3,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kARGBShuffleMirror) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBMIRRORROW_SSSE3
-
-#ifdef HAS_SPLITUVROW_SSE2
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movdqa,xmm2,0x00,1,2,1) // movdqa %%xmm2,(%1,%2)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movdqu,xmm2,0x00,1,2,1) // movdqu %%xmm2,(%1,%2)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-#endif // HAS_SPLITUVROW_SSE2
-
-#ifdef HAS_MERGEUVROW_SSE2
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- asm volatile (
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm2 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "movdqa %%xmm2," MEMACCESS2(0x10,2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2"
-#endif
- );
-}
-
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
- uint8* dst_uv, int width) {
- asm volatile (
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm2 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2"
-#endif
- );
-}
-#endif // HAS_MERGEUVROW_SSE2
-
-#ifdef HAS_COPYROW_SSE2
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-#endif // HAS_COPYROW_SSE2
-
-#ifdef HAS_COPYROW_X86
-void CopyRow_X86(const uint8* src, uint8* dst, int width) {
- size_t width_tmp = (size_t)(width);
- asm volatile (
- "shr $0x2,%2 \n"
- "rep movsl " MEMMOVESTRING(0,1) " \n"
- : "+S"(src), // %0
- "+D"(dst), // %1
- "+c"(width_tmp) // %2
- :
- : "memory", "cc"
- );
-}
-#endif // HAS_COPYROW_X86
-
-#ifdef HAS_COPYROW_ERMS
-// Unaligned Multiple of 1.
-void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
- size_t width_tmp = (size_t)(width);
- asm volatile (
- "rep movsb " MEMMOVESTRING(0,1) " \n"
- : "+S"(src), // %0
- "+D"(dst), // %1
- "+c"(width_tmp) // %2
- :
- : "memory", "cc"
- );
-}
-#endif // HAS_COPYROW_ERMS
-
-#ifdef HAS_ARGBCOPYALPHAROW_SSE2
-// width in pixels
-void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm0,%%xmm0 \n"
- "pslld $0x18,%%xmm0 \n"
- "pcmpeqb %%xmm1,%%xmm1 \n"
- "psrld $0x8,%%xmm1 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa " MEMACCESS(1) ",%%xmm4 \n"
- "movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n"
- "pand %%xmm0,%%xmm2 \n"
- "pand %%xmm0,%%xmm3 \n"
- "pand %%xmm1,%%xmm4 \n"
- "pand %%xmm1,%%xmm5 \n"
- "por %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqa %%xmm2," MEMACCESS(1) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBCOPYALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYALPHAROW_AVX2
-// width in pixels
-void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpsrld $0x8,%%ymm0,%%ymm0 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm2 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n"
- "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm1," MEMACCESS(1) " \n"
- "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2"
-#endif
- );
-}
-#endif // HAS_ARGBCOPYALPHAROW_AVX2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
-// width in pixels
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm0,%%xmm0 \n"
- "pslld $0x18,%%xmm0 \n"
- "pcmpeqb %%xmm1,%%xmm1 \n"
- "psrld $0x8,%%xmm1 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "punpckhwd %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm2,%%xmm2 \n"
- "movdqa " MEMACCESS(1) ",%%xmm4 \n"
- "movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n"
- "pand %%xmm0,%%xmm2 \n"
- "pand %%xmm0,%%xmm3 \n"
- "pand %%xmm1,%%xmm4 \n"
- "pand %%xmm1,%%xmm5 \n"
- "por %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqa %%xmm2," MEMACCESS(1) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
-// width in pixels
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpsrld $0x8,%%ymm0,%%ymm0 \n"
- LABELALIGN
- "1: \n"
- "vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n"
- "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "vpslld $0x18,%%ymm1,%%ymm1 \n"
- "vpslld $0x18,%%ymm2,%%ymm2 \n"
- "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n"
- "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm1," MEMACCESS(1) " \n"
- "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2"
-#endif
- );
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
-
-#ifdef HAS_SETROW_X86
-void SetRow_X86(uint8* dst, uint32 v32, int width) {
- size_t width_tmp = (size_t)(width);
- asm volatile (
- "shr $0x2,%1 \n"
- "rep stosl " MEMSTORESTRING(eax,0) " \n"
- : "+D"(dst), // %0
- "+c"(width_tmp) // %1
- : "a"(v32) // %2
- : "memory", "cc");
-}
-
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- for (int y = 0; y < height; ++y) {
- size_t width_tmp = (size_t)(width);
- uint32* d = (uint32*)(dst);
- asm volatile (
- "rep stosl " MEMSTORESTRING(eax,0) " \n"
- : "+D"(d), // %0
- "+c"(width_tmp) // %1
- : "a"(v32) // %2
- : "memory", "cc");
- dst += dst_stride;
- }
-}
-#endif // HAS_SETROW_X86
-
-#ifdef HAS_YUY2TOYROW_SSE2
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2
- MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
- int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2
- MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_YUY2TOYROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSE2
-// Blend 8 pixels at a time.
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0xf,%%xmm7 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x8,%%xmm6 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psllw $0x8,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "sub $0x1,%3 \n"
- "je 91f \n"
- "jl 99f \n"
-
- // 1 pixel loop until destination pointer is aligned.
- "10: \n"
- "test $0xf,%2 \n"
- "je 19f \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "jge 10b \n"
-
- "19: \n"
- "add $1-4,%3 \n"
- "jl 49f \n"
-
- // 4 pixel loop.
- LABELALIGN
- "41: \n"
- "movdqu " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 41b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 99f \n"
-
- // 1 pixel loop.
- "91: \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "jge 91b \n"
- "99: \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_ARGBBLENDROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSSE3
-// Shuffle table for isolating alpha.
-static uvec8 kShuffleAlpha = {
- 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
- 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
-};
-
-// Blend 8 pixels at a time
-// Shuffle table for reversing the bytes.
-
-// Same as SSE2, but replaces
-// psrlw xmm3, 8 // alpha
-// pshufhw xmm3, xmm3,0F5h // 8 alpha words
-// pshuflw xmm3, xmm3,0F5h
-// with..
-// pshufb xmm3, kShuffleAlpha // alpha
-
-void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0xf,%%xmm7 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x8,%%xmm6 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psllw $0x8,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "sub $0x1,%3 \n"
- "je 91f \n"
- "jl 99f \n"
-
- // 1 pixel loop until destination pointer is aligned.
- "10: \n"
- "test $0xf,%2 \n"
- "je 19f \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "jge 10b \n"
-
- "19: \n"
- "add $1-4,%3 \n"
- "jl 49f \n"
- "test $0xf,%0 \n"
- "jne 41f \n"
- "test $0xf,%1 \n"
- "jne 41f \n"
-
- // 4 pixel loop.
- LABELALIGN
- "40: \n"
- "movdqa " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqa " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqa " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 40b \n"
- "jmp 49f \n"
-
- // 4 pixel unaligned loop.
- LABELALIGN
- "41: \n"
- "movdqu " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 41b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 99f \n"
-
- // 1 pixel loop.
- "91: \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x1,%3 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "jge 91b \n"
- "99: \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : "m"(kShuffleAlpha) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_ARGBBLENDROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-// aligned to 16 bytes
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x8,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "pshufhw $0xff,%%xmm0,%%xmm2 \n"
- "pshuflw $0xff,%%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "pshufhw $0xff,%%xmm1,%%xmm2 \n"
- "pshuflw $0xff,%%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "pand %%xmm4,%%xmm2 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-// Shuffle table duplicating alpha
-static uvec8 kShuffleAlpha0 = {
- 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
-};
-static uvec8 kShuffleAlpha1 = {
- 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
- 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
-};
-// Attenuate 4 pixels at a time.
-// aligned to 16 bytes
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "pslld $0x18,%%xmm3 \n"
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "punpcklbw %%xmm1,%%xmm1 \n"
- "pmulhuw %%xmm1,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "punpckhbw %%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "pand %%xmm3,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAlpha0), // %3
- "m"(kShuffleAlpha1) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBATTENUATEROW_SSSE3
-
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-// Unattenuate 4 pixels at a time.
-// aligned to 16 bytes
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- uintptr_t alpha = 0;
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movzb " MEMACCESS2(0x03,0) ",%3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x07,0) ",%3 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "movlhps %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "movzb " MEMACCESS2(0x0b,0) ",%3 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x0f,0) ",%3 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "movlhps %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width), // %2
- "+r"(alpha) // %3
- : "r"(fixed_invtbl8) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBUNATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBGRAYROW_SSSE3
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrld $0x18,%%xmm2 \n"
- "psrld $0x18,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm3 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm3,%%xmm0 \n"
- "punpckhwd %%xmm3,%%xmm1 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBGRAYROW_SSSE3
-
-#ifdef HAS_ARGBSEPIAROW_SSSE3
-// b = (r * 35 + g * 68 + b * 17) >> 7
-// g = (r * 45 + g * 88 + b * 22) >> 7
-// r = (r * 50 + g * 98 + b * 24) >> 7
-// Constant for ARGB color to sepia tone
-static vec8 kARGBToSepiaB = {
- 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
-};
-
-static vec8 kARGBToSepiaG = {
- 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
-};
-
-static vec8 kARGBToSepiaR = {
- 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
-};
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
- asm volatile (
- "movdqa %2,%%xmm2 \n"
- "movdqa %3,%%xmm3 \n"
- "movdqa %4,%%xmm4 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "pmaddubsw %%xmm2,%%xmm6 \n"
- "phaddw %%xmm6,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm5 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm5 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm5 \n"
- "psrlw $0x7,%%xmm5 \n"
- "packuswb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm5 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm5 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm5 \n"
- "psrlw $0x7,%%xmm5 \n"
- "packuswb %%xmm5,%%xmm5 \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "psrld $0x18,%%xmm6 \n"
- "psrld $0x18,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm5 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm5,%%xmm0 \n"
- "punpckhwd %%xmm5,%%xmm1 \n"
- "sub $0x8,%1 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "m"(kARGBToSepiaB), // %2
- "m"(kARGBToSepiaG), // %3
- "m"(kARGBToSepiaR) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-#endif // HAS_ARGBSEPIAROW_SSSE3
-
-#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// Same as Sepia except matrix is provided.
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- asm volatile (
- "movdqu " MEMACCESS(3) ",%%xmm5 \n"
- "pshufd $0x00,%%xmm5,%%xmm2 \n"
- "pshufd $0x55,%%xmm5,%%xmm3 \n"
- "pshufd $0xaa,%%xmm5,%%xmm4 \n"
- "pshufd $0xff,%%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "pmaddubsw %%xmm2,%%xmm7 \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "phaddsw %%xmm7,%%xmm0 \n"
- "phaddsw %%xmm1,%%xmm6 \n"
- "psraw $0x6,%%xmm0 \n"
- "psraw $0x6,%%xmm6 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm7 \n"
- "phaddsw %%xmm7,%%xmm1 \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm7 \n"
- "phaddsw %%xmm7,%%xmm6 \n"
- "psraw $0x6,%%xmm1 \n"
- "psraw $0x6,%%xmm6 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "punpcklwd %%xmm1,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm6 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(matrix_argb) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
-
-#ifdef HAS_ARGBQUANTIZEROW_SSE2
-// Quantize 4 ARGB pixels (16 bytes).
-// aligned to 16 bytes
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- asm volatile (
- "movd %2,%%xmm2 \n"
- "movd %3,%%xmm3 \n"
- "movd %4,%%xmm4 \n"
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshufd $0x44,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "pshufd $0x44,%%xmm3,%%xmm3 \n"
- "pshuflw $0x40,%%xmm4,%%xmm4 \n"
- "pshufd $0x44,%%xmm4,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "pslld $0x18,%%xmm6 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm1 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "pmullw %%xmm3,%%xmm0 \n"
- "movdqa " MEMACCESS(0) ",%%xmm7 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "pand %%xmm6,%%xmm7 \n"
- "paddw %%xmm4,%%xmm0 \n"
- "paddw %%xmm4,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "por %%xmm7,%%xmm0 \n"
- "sub $0x4,%1 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "r"(scale), // %2
- "r"(interval_size), // %3
- "r"(interval_offset) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_ARGBQUANTIZEROW_SSE2
-
-#ifdef HAS_ARGBSHADEROW_SSE2
-// Shade 4 pixels at a time by specified value.
-// Aligned to 16 bytes.
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- asm volatile (
- "movd %3,%%xmm2 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "punpcklqdq %%xmm2,%%xmm2 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(value) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2"
-#endif
- );
-}
-#endif // HAS_ARGBSHADEROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqu %%xmm0,%%xmm1 \n"
- "movdqu %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpckhbw %%xmm5,%%xmm3 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "pmulhuw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBMULTIPLYROW_SSE2
-
-#ifdef HAS_ARGBADDROW_SSE2
-// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-#endif // HAS_ARGBADDROW_SSE2
-
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
-void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psubusb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-#endif // HAS_ARGBSUBTRACTROW_SSE2
-
-#ifdef HAS_SOBELXROW_SSE2
-// SobelX as a matrix is
-// -1 0 1
-// -2 0 2
-// -1 0 1
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "sub %0,%2 \n"
- "sub %0,%3 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "movq " MEMACCESS2(0x2,0) ",%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "psubw %%xmm1,%%xmm0 \n"
- BUNDLEALIGN
- MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
- MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "psubw %%xmm2,%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2
- MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "psubw %%xmm3,%%xmm2 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "psubw %%xmm0,%%xmm1 \n"
- "pmaxsw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "sub $0x8,%4 \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1)
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "jg 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(src_y2), // %2
- "+r"(dst_sobelx), // %3
- "+r"(width) // %4
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-#endif // HAS_SOBELXROW_SSE2
-
-#ifdef HAS_SOBELYROW_SSE2
-// SobelY as a matrix is
-// -1 -2 -1
-// 0 0 0
-// 1 2 1
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "sub %0,%2 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "psubw %%xmm1,%%xmm0 \n"
- BUNDLEALIGN
- "movq " MEMACCESS2(0x1,0) ",%%xmm1 \n"
- MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "psubw %%xmm2,%%xmm1 \n"
- BUNDLEALIGN
- "movq " MEMACCESS2(0x2,0) ",%%xmm2 \n"
- MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "psubw %%xmm3,%%xmm2 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "psubw %%xmm0,%%xmm1 \n"
- "pmaxsw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "sub $0x8,%3 \n"
- BUNDLEALIGN
- MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1)
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "jg 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(dst_sobely), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-#endif // HAS_SOBELYROW_SSE2
-
-#ifdef HAS_SOBELROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm2 \n"
- "punpckhbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm1 \n"
- "punpckhwd %%xmm2,%%xmm2 \n"
- "por %%xmm5,%%xmm1 \n"
- "por %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklwd %%xmm0,%%xmm3 \n"
- "punpckhwd %%xmm0,%%xmm0 \n"
- "por %%xmm5,%%xmm3 \n"
- "por %%xmm5,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm1," MEMACCESS(2) " \n"
- "movdqa %%xmm2," MEMACCESS2(0x10,2) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x20,2) " \n"
- "movdqa %%xmm0," MEMACCESS2(0x30,2) " \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-#endif // HAS_SOBELROW_SSE2
-
-#ifdef HAS_SOBELTOPLANEROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_y), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-#endif // HAS_SOBELTOPLANEROW_SSE2
-
-#ifdef HAS_SOBELXYROW_SSE2
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "paddusb %%xmm1,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "punpckhbw %%xmm5,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "punpcklbw %%xmm2,%%xmm4 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "punpcklwd %%xmm3,%%xmm6 \n"
- "punpckhwd %%xmm3,%%xmm4 \n"
- "movdqa %%xmm1,%%xmm7 \n"
- "punpcklwd %%xmm0,%%xmm7 \n"
- "punpckhwd %%xmm0,%%xmm1 \n"
- "sub $0x10,%3 \n"
- "movdqa %%xmm6," MEMACCESS(2) " \n"
- "movdqa %%xmm4," MEMACCESS2(0x10,2) " \n"
- "movdqa %%xmm7," MEMACCESS2(0x20,2) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x30,2) " \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_SOBELXYROW_SSE2
-
-#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
-// Creates a table of cumulative sums where each value is a sum of all values
-// above and to the left of the value, inclusive of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) {
- asm volatile (
- "pxor %%xmm0,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
- "test $0xf,%1 \n"
- "jne 49f \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm2,%%xmm4 \n"
- "punpcklbw %%xmm1,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm1,%%xmm2 \n"
- "punpckhwd %%xmm1,%%xmm3 \n"
- "punpckhbw %%xmm1,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "punpcklwd %%xmm1,%%xmm4 \n"
- "punpckhwd %%xmm1,%%xmm5 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "movdqa " MEMACCESS(2) ",%%xmm2 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,2) ",%%xmm3 \n"
- "paddd %%xmm0,%%xmm3 \n"
- "paddd %%xmm4,%%xmm0 \n"
- "movdqa " MEMACCESS2(0x20,2) ",%%xmm4 \n"
- "paddd %%xmm0,%%xmm4 \n"
- "paddd %%xmm5,%%xmm0 \n"
- "movdqa " MEMACCESS2(0x30,2) ",%%xmm5 \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "paddd %%xmm0,%%xmm5 \n"
- "movdqa %%xmm2," MEMACCESS(1) " \n"
- "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n"
- "movdqa %%xmm4," MEMACCESS2(0x20,1) " \n"
- "movdqa %%xmm5," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "movd " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "punpcklbw %%xmm1,%%xmm2 \n"
- "punpcklwd %%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x1,%3 \n"
- "jge 10b \n"
-
- "19: \n"
- : "+r"(row), // %0
- "+r"(cumsum), // %1
- "+r"(previous_cumsum), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
-
-#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst,
- int count) {
- asm volatile (
- "movd %5,%%xmm5 \n"
- "cvtdq2ps %%xmm5,%%xmm5 \n"
- "rcpss %%xmm5,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
- "cmpl $0x80,%5 \n"
- "ja 40f \n"
-
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrld $0x10,%%xmm6 \n"
- "cvtdq2ps %%xmm6,%%xmm6 \n"
- "addps %%xmm6,%%xmm5 \n"
- "mulps %%xmm4,%%xmm5 \n"
- "cvtps2dq %%xmm5,%%xmm5 \n"
- "packssdw %%xmm5,%%xmm5 \n"
-
- // 4 pixel small loop \n"
- LABELALIGN
- "4: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- BUNDLEALIGN
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
- MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
- MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
- "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
- "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- BUNDLEALIGN
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
- MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
- MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packssdw %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm0 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 4b \n"
- "jmp 49f \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- BUNDLEALIGN
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
- MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
- MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
- "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
- "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- BUNDLEALIGN
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
- MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
- MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "cvtdq2ps %%xmm1,%%xmm1 \n"
- "mulps %%xmm4,%%xmm0 \n"
- "mulps %%xmm4,%%xmm1 \n"
- "cvtdq2ps %%xmm2,%%xmm2 \n"
- "cvtdq2ps %%xmm3,%%xmm3 \n"
- "mulps %%xmm4,%%xmm2 \n"
- "mulps %%xmm4,%%xmm3 \n"
- "cvtps2dq %%xmm0,%%xmm0 \n"
- "cvtps2dq %%xmm1,%%xmm1 \n"
- "cvtps2dq %%xmm2,%%xmm2 \n"
- "cvtps2dq %%xmm3,%%xmm3 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packssdw %%xmm3,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- BUNDLEALIGN
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "mulps %%xmm4,%%xmm0 \n"
- "cvtps2dq %%xmm0,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "sub $0x1,%3 \n"
- "jge 10b \n"
- "19: \n"
- : "+r"(topleft), // %0
- "+r"(botleft), // %1
- "+r"(dst), // %2
- "+rm"(count) // %3
- : "r"((intptr_t)(width)), // %4
- "rm"(area) // %5
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-
-#ifdef HAS_ARGBAFFINEROW_SSE2
-// Copy ARGB pixels from source image with slope to a row of destination.
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* src_dudv, int width) {
- intptr_t src_argb_stride_temp = src_argb_stride;
- intptr_t temp = 0;
- asm volatile (
- "movq " MEMACCESS(3) ",%%xmm2 \n"
- "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n"
- "shl $0x10,%1 \n"
- "add $0x4,%1 \n"
- "movd %1,%%xmm5 \n"
- "sub $0x4,%4 \n"
- "jl 49f \n"
-
- "pshufd $0x44,%%xmm7,%%xmm7 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "addps %%xmm7,%%xmm0 \n"
- "movlhps %%xmm0,%%xmm2 \n"
- "movdqa %%xmm7,%%xmm4 \n"
- "addps %%xmm4,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "addps %%xmm4,%%xmm3 \n"
- "addps %%xmm4,%%xmm4 \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2
- "cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2
- "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts
- "pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride
- "movd %%xmm0,%k1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k5 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- BUNDLEALIGN
- MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
- MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
- "punpckldq %%xmm6,%%xmm1 \n"
- "addps %%xmm4,%%xmm2 \n"
- "movq %%xmm1," MEMACCESS(2) " \n"
- "movd %%xmm0,%k1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k5 \n"
- BUNDLEALIGN
- MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
- MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
- "punpckldq %%xmm6,%%xmm0 \n"
- "addps %%xmm4,%%xmm3 \n"
- "sub $0x4,%4 \n"
- "movq %%xmm0," MEMACCESS2(0x08,2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%4 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "cvttps2dq %%xmm2,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "pmaddwd %%xmm5,%%xmm0 \n"
- "addps %%xmm7,%%xmm2 \n"
- "movd %%xmm0,%k1 \n"
- BUNDLEALIGN
- MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
- "sub $0x1,%4 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x04,2) ",%2 \n"
- "jge 10b \n"
- "19: \n"
- : "+r"(src_argb), // %0
- "+r"(src_argb_stride_temp), // %1
- "+r"(dst_argb), // %2
- "+r"(src_dudv), // %3
- "+rm"(width), // %4
- "+r"(temp) // %5
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_ARGBAFFINEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm2)
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "pmaddubsw %%xmm5,%%xmm0 \n"
- "pmaddubsw %%xmm5,%%xmm1 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqa " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm0)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm5"
-#endif
- );
-}
-#endif // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm2) // movdqa (%1,%4,1),%%xmm2
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm0 \n"
- "punpckhbw %%xmm4,%%xmm1 \n"
- "psubw %%xmm0,%%xmm2 \n"
- "psubw %%xmm1,%%xmm3 \n"
- "paddw %%xmm2,%%xmm2 \n"
- "paddw %%xmm3,%%xmm3 \n"
- "pmulhw %%xmm5,%%xmm2 \n"
- "pmulhw %%xmm5,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqa " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,1,4,1,xmm0) // movdqa (%1,%4,1),%%xmm0
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm2)
- "movdqu %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "pmaddubsw %%xmm5,%%xmm0 \n"
- "pmaddubsw %%xmm5,%%xmm1 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm0)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm5"
-#endif
- );
-}
-#endif // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2
- "movdqu %%xmm0,%%xmm1 \n"
- "movdqu %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm0 \n"
- "punpckhbw %%xmm4,%%xmm1 \n"
- "psubw %%xmm0,%%xmm2 \n"
- "psubw %%xmm1,%%xmm3 \n"
- "paddw %%xmm2,%%xmm2 \n"
- "paddw %%xmm3,%%xmm3 \n"
- "pmulhw %%xmm5,%%xmm2 \n"
- "pmulhw %%xmm5,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "sub $0x10,%2 \n"
- BUNDLEALIGN
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-#ifdef HAS_HALFROW_SSE2
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- asm volatile (
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3),%%xmm0
- "sub $0x10,%2 \n"
- MEMOPMEM(movdqa,xmm0,0x00,0,1,1) // movdqa %%xmm0,(%0,%1)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "jg 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_uv), // %1
- "+r"(pix) // %2
- : "r"((intptr_t)(src_uv_stride)) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0"
-#endif
- );
-}
-#endif // HAS_HALFROW_SSE2
-
-#ifdef HAS_ARGBTOBAYERROW_SSSE3
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- asm volatile (
- // NaCL caveat - assumes movd is from GPR
- "movd %3,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- "sub $0x8,%2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- : "g"(selector) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBTOBAYERROW_SSSE3
-
-#ifdef HAS_ARGBTOBAYERGGROW_SSE2
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x18,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrld $0x8,%%xmm0 \n"
- "psrld $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x8,%2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_bayer), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBTOBAYERGGROW_SSE2
-
-#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- "movdqa " MEMACCESS(3) ",%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- "movdqa " MEMACCESS(3) ",%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "sub $0x8,%2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBSHUFFLEROW_SSSE3
-
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- "vbroadcastf128 " MEMACCESS(3) ",%%ymm5 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
- "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
- "sub $0x10,%2 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBSHUFFLEROW_AVX2
-
-#ifdef HAS_ARGBSHUFFLEROW_SSE2
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- uintptr_t pixel_temp = 0u;
- asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
- "mov " MEMACCESS(4) ",%k2 \n"
- "cmp $0x3000102,%k2 \n"
- "je 3012f \n"
- "cmp $0x10203,%k2 \n"
- "je 123f \n"
- "cmp $0x30201,%k2 \n"
- "je 321f \n"
- "cmp $0x2010003,%k2 \n"
- "je 2103f \n"
-
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS(1) " \n"
- "movzb " MEMACCESS2(0x1,4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS2(0x1,1) " \n"
- BUNDLEALIGN
- "movzb " MEMACCESS2(0x2,4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS2(0x2,1) " \n"
- "movzb " MEMACCESS2(0x3,4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS2(0x3,1) " \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "sub $0x1,%3 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "123: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0x1b,%%xmm0,%%xmm0 \n"
- "pshuflw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufhw $0x1b,%%xmm1,%%xmm1 \n"
- "pshuflw $0x1b,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 123b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "321: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0x39,%%xmm0,%%xmm0 \n"
- "pshuflw $0x39,%%xmm0,%%xmm0 \n"
- "pshufhw $0x39,%%xmm1,%%xmm1 \n"
- "pshuflw $0x39,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 321b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "2103: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0x93,%%xmm0,%%xmm0 \n"
- "pshuflw $0x93,%%xmm0,%%xmm0 \n"
- "pshufhw $0x93,%%xmm1,%%xmm1 \n"
- "pshuflw $0x93,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 2103b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "3012: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0xc6,%%xmm0,%%xmm0 \n"
- "pshuflw $0xc6,%%xmm0,%%xmm0 \n"
- "pshufhw $0xc6,%%xmm1,%%xmm1 \n"
- "pshuflw $0xc6,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 3012b \n"
-
- "99: \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+d"(pixel_temp), // %2
- "+r"(pix) // %3
- : "r"(shuffler) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBSHUFFLEROW_SSE2
-
-#ifdef HAS_I422TOYUY2ROW_SSE2
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(1) ",%%xmm2 \n"
- MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(3) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,3) " \n"
- "lea " MEMLEA(0x20,3) ",%3 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_frame), // %3
- "+rm"(width) // %4
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
- );
-}
-#endif // HAS_I422TOYUY2ROW_SSE2
-
-#ifdef HAS_I422TOUYVYROW_SSE2
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(1) ",%%xmm2 \n"
- MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS(3) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,3) " \n"
- "lea " MEMLEA(0x20,3) ",%3 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_frame), // %3
- "+rm"(width) // %4
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
- );
-}
-#endif // HAS_I422TOUYVYROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- asm volatile (
- "pxor %%xmm3,%%xmm3 \n"
-
- // 2 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm3,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "punpcklwd %%xmm3,%%xmm0 \n"
- "punpckhwd %%xmm3,%%xmm4 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "cvtdq2ps %%xmm4,%%xmm4 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "mulps " MEMACCESS2(0x10,3) ",%%xmm0 \n"
- "mulps " MEMACCESS2(0x10,3) ",%%xmm4 \n"
- "addps " MEMACCESS(3) ",%%xmm0 \n"
- "addps " MEMACCESS(3) ",%%xmm4 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "movdqa %%xmm5,%%xmm6 \n"
- "mulps %%xmm1,%%xmm2 \n"
- "mulps %%xmm5,%%xmm6 \n"
- "mulps %%xmm2,%%xmm1 \n"
- "mulps %%xmm6,%%xmm5 \n"
- "mulps " MEMACCESS2(0x20,3) ",%%xmm2 \n"
- "mulps " MEMACCESS2(0x20,3) ",%%xmm6 \n"
- "mulps " MEMACCESS2(0x30,3) ",%%xmm1 \n"
- "mulps " MEMACCESS2(0x30,3) ",%%xmm5 \n"
- "addps %%xmm2,%%xmm0 \n"
- "addps %%xmm6,%%xmm4 \n"
- "addps %%xmm1,%%xmm0 \n"
- "addps %%xmm5,%%xmm4 \n"
- "cvttps2dq %%xmm0,%%xmm0 \n"
- "cvttps2dq %%xmm4,%%xmm4 \n"
- "packuswb %%xmm4,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "sub $0x2,%2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(poly) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-#endif // HAS_ARGBPOLYNOMIALROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- asm volatile (
- "vbroadcastf128 " MEMACCESS(3) ",%%ymm4 \n"
- "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n"
- "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n"
- "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n"
-
- // 2 pixel loop.
- LABELALIGN
- "1: \n"
- "vpmovzxbd " MEMACCESS(0) ",%%ymm0 \n" // 2 ARGB pixels
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats
- "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X
- "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X
- "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X
- "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X
- "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * X
- "vcvttps2dq %%ymm0,%%ymm0 \n"
- "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n"
- "sub $0x2,%2 \n"
- "vmovq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(poly) // %3
- : "memory", "cc"
-#if defined(__SSE2__)
-// TODO(fbarchard): declare ymm usage when applicable.
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-#endif // HAS_ARGBPOLYNOMIALROW_AVX2
-
-#ifdef HAS_ARGBCOLORTABLEROW_X86
-// Tranform ARGB pixels with color table.
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
- int width) {
- uintptr_t pixel_temp = 0u;
- asm volatile (
- // 1 pixel loop.
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(0) ",%1 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x4,0) " \n"
- "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
- MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x3,0) " \n"
- "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
- MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x2,0) " \n"
- "movzb " MEMACCESS2(-0x1,0) ",%1 \n"
- MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x1,0) " \n"
- "dec %2 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+d"(pixel_temp), // %1
- "+r"(width) // %2
- : "r"(table_argb) // %3
- : "memory", "cc");
-}
-#endif // HAS_ARGBCOLORTABLEROW_X86
-
-#ifdef HAS_RGBCOLORTABLEROW_X86
-// Tranform RGB pixels with color table.
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
- uintptr_t pixel_temp = 0u;
- asm volatile (
- // 1 pixel loop.
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(0) ",%1 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x4,0) " \n"
- "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
- MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x3,0) " \n"
- "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
- MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x2,0) " \n"
- "dec %2 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+d"(pixel_temp), // %1
- "+r"(width) // %2
- : "r"(table_argb) // %3
- : "memory", "cc");
-}
-#endif // HAS_RGBCOLORTABLEROW_X86
-
-#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
-// Tranform RGB pixels with luma table.
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width,
- const uint8* luma, uint32 lumacoeff) {
- uintptr_t pixel_temp = 0u;
- uintptr_t table_temp = 0u;
- asm volatile (
- "movd %6,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0x8,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(2) ",%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "phaddw %%xmm0,%%xmm0 \n"
- "pand %%xmm4,%%xmm0 \n"
- "punpcklwd %%xmm5,%%xmm0 \n"
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS(2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS(3) " \n"
- "movzb " MEMACCESS2(0x1,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x1,3) " \n"
- "movzb " MEMACCESS2(0x2,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x2,3) " \n"
- "movzb " MEMACCESS2(0x3,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0x3,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS2(0x4,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x4,3) " \n"
- BUNDLEALIGN
- "movzb " MEMACCESS2(0x5,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x5,3) " \n"
- "movzb " MEMACCESS2(0x6,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x6,3) " \n"
- "movzb " MEMACCESS2(0x7,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0x7,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS2(0x8,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x8,3) " \n"
- "movzb " MEMACCESS2(0x9,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x9,3) " \n"
- "movzb " MEMACCESS2(0xa,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xa,3) " \n"
- "movzb " MEMACCESS2(0xb,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0xb,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
-
- "movzb " MEMACCESS2(0xc,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xc,3) " \n"
- "movzb " MEMACCESS2(0xd,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xd,3) " \n"
- "movzb " MEMACCESS2(0xe,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xe,3) " \n"
- "movzb " MEMACCESS2(0xf,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0xf,3) " \n"
- "sub $0x4,%4 \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "lea " MEMLEA(0x10,3) ",%3 \n"
- "jg 1b \n"
- : "+d"(pixel_temp), // %0
- "+a"(table_temp), // %1
- "+r"(src_argb), // %2
- "+r"(dst_argb), // %3
- "+rm"(width) // %4
- : "r"(luma), // %5
- "rm"(lumacoeff) // %6
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
-
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc
deleted file mode 100755
index f13e4d7ae5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc
+++ /dev/null
@@ -1,7284 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef HAS_ARGBTOYROW_SSSE3
-
-// Constants for ARGB.
-static const vec8 kARGBToY = {
- 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
-};
-
-// JPeg full range.
-static const vec8 kARGBToYJ = {
- 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
-};
-
-static const vec8 kARGBToU = {
- 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
-};
-
-static const vec8 kARGBToUJ = {
- 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
-};
-
-static const vec8 kARGBToV = {
- -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
-};
-
-static const vec8 kARGBToVJ = {
- -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
-};
-
-// vpermd for vphaddw + vpackuswb vpermd.
-static const lvec32 kPermdARGBToY_AVX = {
- 0, 4, 1, 5, 2, 6, 3, 7
-};
-
-// vpshufb for vphaddw + vpackuswb packed to shorts.
-static const lvec8 kShufARGBToUV_AVX = {
- 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
- 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
-};
-
-// Constants for BGRA.
-static const vec8 kBGRAToY = {
- 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
-};
-
-static const vec8 kBGRAToU = {
- 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
-};
-
-static const vec8 kBGRAToV = {
- 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
-};
-
-// Constants for ABGR.
-static const vec8 kABGRToY = {
- 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
-};
-
-static const vec8 kABGRToU = {
- -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
-};
-
-static const vec8 kABGRToV = {
- 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
-};
-
-// Constants for RGBA.
-static const vec8 kRGBAToY = {
- 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
-};
-
-static const vec8 kRGBAToU = {
- 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
-};
-
-static const vec8 kRGBAToV = {
- 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
-};
-
-static const uvec8 kAddY16 = {
- 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
-};
-
-static const vec16 kAddYJ64 = {
- 64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static const uvec8 kAddUV128 = {
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
-};
-
-static const uvec16 kAddUVJ128 = {
- 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
-};
-
-// Shuffle table for converting RGB24 to ARGB.
-static const uvec8 kShuffleMaskRGB24ToARGB = {
- 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
-};
-
-// Shuffle table for converting RAW to ARGB.
-static const uvec8 kShuffleMaskRAWToARGB = {
- 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
-};
-
-// Shuffle table for converting ARGB to RGB24.
-static const uvec8 kShuffleMaskARGBToRGB24 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static const uvec8 kShuffleMaskARGBToRAW = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4
-static const uvec8 kShuffleMaskARGBToRGB24_0 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static const uvec8 kShuffleMaskARGBToRAW_0 = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
-};
-
-// Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) __declspec(align(16))
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_y
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
-
- align 4
- convertloop:
- movq xmm0, qword ptr [eax]
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm0
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm0
- punpckhwd xmm1, xmm1
- por xmm0, xmm5
- por xmm1, xmm5
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, [esp + 4] // src_y
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
-
- align 4
- convertloop:
- movq xmm0, qword ptr [eax]
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm0
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm0
- punpckhwd xmm1, xmm1
- por xmm0, xmm5
- por xmm1, xmm5
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_rgb24
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
- movdqa xmm4, kShuffleMaskRGB24ToARGB
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm3, [eax + 32]
- lea eax, [eax + 48]
- movdqa xmm2, xmm3
- palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
- pshufb xmm2, xmm4
- por xmm2, xmm5
- palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
- pshufb xmm0, xmm4
- movdqa [edx + 32], xmm2
- por xmm0, xmm5
- pshufb xmm1, xmm4
- movdqa [edx], xmm0
- por xmm1, xmm5
- palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
- pshufb xmm3, xmm4
- movdqa [edx + 16], xmm1
- por xmm3, xmm5
- sub ecx, 16
- movdqa [edx + 48], xmm3
- lea edx, [edx + 64]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, [esp + 4] // src_raw
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
- movdqa xmm4, kShuffleMaskRAWToARGB
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm3, [eax + 32]
- lea eax, [eax + 48]
- movdqa xmm2, xmm3
- palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
- pshufb xmm2, xmm4
- por xmm2, xmm5
- palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
- pshufb xmm0, xmm4
- movdqa [edx + 32], xmm2
- por xmm0, xmm5
- pshufb xmm1, xmm4
- movdqa [edx], xmm0
- por xmm1, xmm5
- palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
- pshufb xmm3, xmm4
- movdqa [edx + 16], xmm1
- por xmm3, xmm5
- sub ecx, 16
- movdqa [edx + 48], xmm3
- lea edx, [edx + 64]
- jg convertloop
- ret
- }
-}
-
-// pmul method to replicate bits.
-// Math to replicate bits:
-// (v << 8) | (v << 3)
-// v * 256 + v * 8
-// v * (256 + 8)
-// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
-// 20 instructions.
-__declspec(naked) __declspec(align(16))
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x01080108 // generate multiplier to repeat 5 bits
- movd xmm5, eax
- pshufd xmm5, xmm5, 0
- mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
- movd xmm6, eax
- pshufd xmm6, xmm6, 0
- pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red
- psllw xmm3, 11
- pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green
- psllw xmm4, 10
- psrlw xmm4, 5
- pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha
- psllw xmm7, 8
-
- mov eax, [esp + 4] // src_rgb565
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // fetch 8 pixels of bgr565
- movdqa xmm1, xmm0
- movdqa xmm2, xmm0
- pand xmm1, xmm3 // R in upper 5 bits
- psllw xmm2, 11 // B in upper 5 bits
- pmulhuw xmm1, xmm5 // * (256 + 8)
- pmulhuw xmm2, xmm5 // * (256 + 8)
- psllw xmm1, 8
- por xmm1, xmm2 // RB
- pand xmm0, xmm4 // G in middle 6 bits
- pmulhuw xmm0, xmm6 // << 5 * (256 + 4)
- por xmm0, xmm7 // AG
- movdqa xmm2, xmm1
- punpcklbw xmm1, xmm0
- punpckhbw xmm2, xmm0
- movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
- movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
- lea eax, [eax + 16]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-// 24 instructions
-__declspec(naked) __declspec(align(16))
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x01080108 // generate multiplier to repeat 5 bits
- movd xmm5, eax
- pshufd xmm5, xmm5, 0
- mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
- movd xmm6, eax
- pshufd xmm6, xmm6, 0
- pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red
- psllw xmm3, 11
- movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green
- psrlw xmm4, 6
- pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha
- psllw xmm7, 8
-
- mov eax, [esp + 4] // src_argb1555
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // fetch 8 pixels of 1555
- movdqa xmm1, xmm0
- movdqa xmm2, xmm0
- psllw xmm1, 1 // R in upper 5 bits
- psllw xmm2, 11 // B in upper 5 bits
- pand xmm1, xmm3
- pmulhuw xmm2, xmm5 // * (256 + 8)
- pmulhuw xmm1, xmm5 // * (256 + 8)
- psllw xmm1, 8
- por xmm1, xmm2 // RB
- movdqa xmm2, xmm0
- pand xmm0, xmm4 // G in middle 5 bits
- psraw xmm2, 8 // A
- pmulhuw xmm0, xmm6 // << 6 * (256 + 8)
- pand xmm2, xmm7
- por xmm0, xmm2 // AG
- movdqa xmm2, xmm1
- punpcklbw xmm1, xmm0
- punpckhbw xmm2, xmm0
- movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
- movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
- lea eax, [eax + 16]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-// 18 instructions.
-__declspec(naked) __declspec(align(16))
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
- movd xmm4, eax
- pshufd xmm4, xmm4, 0
- movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles
- pslld xmm5, 4
- mov eax, [esp + 4] // src_argb4444
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // fetch 8 pixels of bgra4444
- movdqa xmm2, xmm0
- pand xmm0, xmm4 // mask low nibbles
- pand xmm2, xmm5 // mask high nibbles
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- psllw xmm1, 4
- psrlw xmm3, 4
- por xmm0, xmm1
- por xmm2, xmm3
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- movdqa [eax * 2 + edx], xmm0 // store 4 pixels of ARGB
- movdqa [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB
- lea eax, [eax + 16]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- movdqa xmm6, kShuffleMaskARGBToRGB24
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // fetch 16 pixels of argb
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- lea eax, [eax + 64]
- pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
- pshufb xmm1, xmm6
- pshufb xmm2, xmm6
- pshufb xmm3, xmm6
- movdqa xmm4, xmm1 // 4 bytes from 1 for 0
- psrldq xmm1, 4 // 8 bytes from 1
- pslldq xmm4, 12 // 4 bytes from 1 for 0
- movdqa xmm5, xmm2 // 8 bytes from 2 for 1
- por xmm0, xmm4 // 4 bytes from 1 for 0
- pslldq xmm5, 8 // 8 bytes from 2 for 1
- movdqu [edx], xmm0 // store 0
- por xmm1, xmm5 // 8 bytes from 2 for 1
- psrldq xmm2, 8 // 4 bytes from 2
- pslldq xmm3, 4 // 12 bytes from 3 for 2
- por xmm2, xmm3 // 12 bytes from 3 for 2
- movdqu [edx + 16], xmm1 // store 1
- movdqu [edx + 32], xmm2 // store 2
- lea edx, [edx + 48]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- movdqa xmm6, kShuffleMaskARGBToRAW
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // fetch 16 pixels of argb
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- lea eax, [eax + 64]
- pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
- pshufb xmm1, xmm6
- pshufb xmm2, xmm6
- pshufb xmm3, xmm6
- movdqa xmm4, xmm1 // 4 bytes from 1 for 0
- psrldq xmm1, 4 // 8 bytes from 1
- pslldq xmm4, 12 // 4 bytes from 1 for 0
- movdqa xmm5, xmm2 // 8 bytes from 2 for 1
- por xmm0, xmm4 // 4 bytes from 1 for 0
- pslldq xmm5, 8 // 8 bytes from 2 for 1
- movdqu [edx], xmm0 // store 0
- por xmm1, xmm5 // 8 bytes from 2 for 1
- psrldq xmm2, 8 // 4 bytes from 2
- pslldq xmm3, 4 // 12 bytes from 3 for 2
- por xmm2, xmm3 // 12 bytes from 3 for 2
- movdqu [edx + 16], xmm1 // store 1
- movdqu [edx + 32], xmm2 // store 2
- lea edx, [edx + 48]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
- psrld xmm3, 27
- pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
- psrld xmm4, 26
- pslld xmm4, 5
- pcmpeqb xmm5, xmm5 // generate mask 0xfffff800
- pslld xmm5, 11
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // fetch 4 pixels of argb
- movdqa xmm1, xmm0 // B
- movdqa xmm2, xmm0 // G
- pslld xmm0, 8 // R
- psrld xmm1, 3 // B
- psrld xmm2, 5 // G
- psrad xmm0, 16 // R
- pand xmm1, xmm3 // B
- pand xmm2, xmm4 // G
- pand xmm0, xmm5 // R
- por xmm1, xmm2 // BG
- por xmm0, xmm1 // BGR
- packssdw xmm0, xmm0
- lea eax, [eax + 16]
- movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
- lea edx, [edx + 8]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-
-// TODO(fbarchard): Improve sign extension/packing.
-__declspec(naked) __declspec(align(16))
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm4, xmm4 // generate mask 0x0000001f
- psrld xmm4, 27
- movdqa xmm5, xmm4 // generate mask 0x000003e0
- pslld xmm5, 5
- movdqa xmm6, xmm4 // generate mask 0x00007c00
- pslld xmm6, 10
- pcmpeqb xmm7, xmm7 // generate mask 0xffff8000
- pslld xmm7, 15
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // fetch 4 pixels of argb
- movdqa xmm1, xmm0 // B
- movdqa xmm2, xmm0 // G
- movdqa xmm3, xmm0 // R
- psrad xmm0, 16 // A
- psrld xmm1, 3 // B
- psrld xmm2, 6 // G
- psrld xmm3, 9 // R
- pand xmm0, xmm7 // A
- pand xmm1, xmm4 // B
- pand xmm2, xmm5 // G
- pand xmm3, xmm6 // R
- por xmm0, xmm1 // BA
- por xmm2, xmm3 // GR
- por xmm0, xmm2 // BGRA
- packssdw xmm0, xmm0
- lea eax, [eax + 16]
- movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
- lea edx, [edx + 8]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm4, xmm4 // generate mask 0xf000f000
- psllw xmm4, 12
- movdqa xmm3, xmm4 // generate mask 0x00f000f0
- psrlw xmm3, 8
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // fetch 4 pixels of argb
- movdqa xmm1, xmm0
- pand xmm0, xmm3 // low nibble
- pand xmm1, xmm4 // high nibble
- psrl xmm0, 4
- psrl xmm1, 8
- por xmm0, xmm1
- packuswb xmm0, xmm0
- lea eax, [eax + 16]
- movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444
- lea edx, [edx + 8]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-
-// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kARGBToY
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- paddw xmm0, xmm5 // Add .5 for rounding.
- paddw xmm2, xmm5
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-#ifdef HAS_ARGBTOYROW_AVX2
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- vbroadcastf128 ymm4, kARGBToY
- vbroadcastf128 ymm5, kAddY16
- vmovdqa ymm6, kPermdARGBToY_AVX
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vmovdqu ymm2, [eax + 64]
- vmovdqu ymm3, [eax + 96]
- vpmaddubsw ymm0, ymm0, ymm4
- vpmaddubsw ymm1, ymm1, ymm4
- vpmaddubsw ymm2, ymm2, ymm4
- vpmaddubsw ymm3, ymm3, ymm4
- lea eax, [eax + 128]
- vphaddw ymm0, ymm0, ymm1 // mutates.
- vphaddw ymm2, ymm2, ymm3
- vpsrlw ymm0, ymm0, 7
- vpsrlw ymm2, ymm2, 7
- vpackuswb ymm0, ymm0, ymm2 // mutates.
- vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
- vpaddb ymm0, ymm0, ymm5
- sub ecx, 32
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOYROW_AVX2
-
-#ifdef HAS_ARGBTOYROW_AVX2
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- vbroadcastf128 ymm4, kARGBToYJ
- vbroadcastf128 ymm5, kAddYJ64
- vmovdqa ymm6, kPermdARGBToY_AVX
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vmovdqu ymm2, [eax + 64]
- vmovdqu ymm3, [eax + 96]
- vpmaddubsw ymm0, ymm0, ymm4
- vpmaddubsw ymm1, ymm1, ymm4
- vpmaddubsw ymm2, ymm2, ymm4
- vpmaddubsw ymm3, ymm3, ymm4
- lea eax, [eax + 128]
- vphaddw ymm0, ymm0, ymm1 // mutates.
- vphaddw ymm2, ymm2, ymm3
- vpaddw ymm0, ymm0, ymm5 // Add .5 for rounding.
- vpaddw ymm2, ymm2, ymm5
- vpsrlw ymm0, ymm0, 7
- vpsrlw ymm2, ymm2, 7
- vpackuswb ymm0, ymm0, ymm2 // mutates.
- vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
- sub ecx, 32
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOYJROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kARGBToY
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- paddw xmm0, xmm5
- paddw xmm2, xmm5
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kBGRAToY
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kBGRAToY
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kABGRToY
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kABGRToY
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kRGBAToY
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm5, kAddY16
- movdqa xmm4, kRGBAToY
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToUJ
- movdqa xmm6, kARGBToVJ
- movdqa xmm5, kAddUVJ128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- paddw xmm0, xmm5 // +.5 rounding -> unsigned
- paddw xmm1, xmm5
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked) __declspec(align(32))
-void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- vbroadcastf128 ymm5, kAddUV128
- vbroadcastf128 ymm6, kARGBToV
- vbroadcastf128 ymm7, kARGBToU
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 32x2 argb pixels to 16x1 */
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vmovdqu ymm2, [eax + 64]
- vmovdqu ymm3, [eax + 96]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- vpavgb ymm2, ymm2, [eax + esi + 64]
- vpavgb ymm3, ymm3, [eax + esi + 96]
- lea eax, [eax + 128]
- vshufps ymm4, ymm0, ymm1, 0x88
- vshufps ymm0, ymm0, ymm1, 0xdd
- vpavgb ymm0, ymm0, ymm4 // mutated by vshufps
- vshufps ymm4, ymm2, ymm3, 0x88
- vshufps ymm2, ymm2, ymm3, 0xdd
- vpavgb ymm2, ymm2, ymm4 // mutated by vshufps
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 32 different pixels, its 16 pixels of U and 16 of V
- vpmaddubsw ymm1, ymm0, ymm7 // U
- vpmaddubsw ymm3, ymm2, ymm7
- vpmaddubsw ymm0, ymm0, ymm6 // V
- vpmaddubsw ymm2, ymm2, ymm6
- vphaddw ymm1, ymm1, ymm3 // mutates
- vphaddw ymm0, ymm0, ymm2
- vpsraw ymm1, ymm1, 8
- vpsraw ymm0, ymm0, 8
- vpacksswb ymm0, ymm1, ymm0 // mutates
- vpermq ymm0, ymm0, 0xd8 // For vpacksswb
- vpshufb ymm0, ymm0, kShufARGBToUV_AVX // For vshufps + vphaddw
- vpaddb ymm0, ymm0, ymm5 // -> unsigned
-
- // step 3 - store 16 U and 16 V values
- sub ecx, 32
- vextractf128 [edx], ymm0, 0 // U
- vextractf128 [edx + edi], ymm0, 1 // V
- lea edx, [edx + 16]
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOUVROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kARGBToUJ
- movdqa xmm6, kARGBToVJ
- movdqa xmm5, kAddUVJ128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- paddw xmm0, xmm5 // +.5 rounding -> unsigned
- paddw xmm1, xmm5
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* convert to U and V */
- movdqa xmm0, [eax] // U
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm7
- pmaddubsw xmm1, xmm7
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm3, xmm7
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
-
- movdqa xmm0, [eax] // V
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pmaddubsw xmm0, xmm6
- pmaddubsw xmm1, xmm6
- pmaddubsw xmm2, xmm6
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- lea eax, [eax + 64]
- movdqa [edx + edi], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* convert to U and V */
- movdqu xmm0, [eax] // U
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm7
- pmaddubsw xmm1, xmm7
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm3, xmm7
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- sub ecx, 16
- movdqu [edx], xmm0
-
- movdqu xmm0, [eax] // V
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm6
- pmaddubsw xmm1, xmm6
- pmaddubsw xmm2, xmm6
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- lea eax, [eax + 64]
- movdqu [edx + edi], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm7, kARGBToU
- movdqa xmm6, kARGBToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kBGRAToU
- movdqa xmm6, kBGRAToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kBGRAToU
- movdqa xmm6, kBGRAToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kABGRToU
- movdqa xmm6, kABGRToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kABGRToU
- movdqa xmm6, kABGRToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kRGBAToU
- movdqa xmm6, kRGBAToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
- pavgb xmm0, [eax + esi]
- pavgb xmm1, [eax + esi + 16]
- pavgb xmm2, [eax + esi + 32]
- pavgb xmm3, [eax + esi + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm7, kRGBAToU
- movdqa xmm6, kRGBAToV
- movdqa xmm5, kAddUV128
- sub edi, edx // stride from u to v
-
- align 4
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- sub ecx, 16
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBTOYROW_SSSE3
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-#ifdef HAS_I422TOARGBROW_AVX2
-
-static const lvec8 kUVToB_AVX = {
- UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
- UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
-};
-static const lvec8 kUVToR_AVX = {
- UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR,
- UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
-};
-static const lvec8 kUVToG_AVX = {
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
-};
-static const lvec16 kYToRgb_AVX = {
- YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
-};
-static const lvec16 kYSub16_AVX = {
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
-};
-static const lvec16 kUVBiasB_AVX = {
- BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
-};
-static const lvec16 kUVBiasG_AVX = {
- BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG
-};
-static const lvec16 kUVBiasR_AVX = {
- BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
-};
-
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
- vpxor ymm4, ymm4, ymm4
-
- align 4
- convertloop:
- vmovq xmm0, qword ptr [esi] // U
- vmovq xmm1, qword ptr [esi + edi] // V
- lea esi, [esi + 8]
- vpunpcklbw ymm0, ymm0, ymm1 // UV
- vpermq ymm0, ymm0, 0xd8
- vpunpcklwd ymm0, ymm0, ymm0 // UVUV
- vpmaddubsw ymm2, ymm0, kUVToB_AVX // scale B UV
- vpmaddubsw ymm1, ymm0, kUVToG_AVX // scale G UV
- vpmaddubsw ymm0, ymm0, kUVToR_AVX // scale R UV
- vpsubw ymm2, ymm2, kUVBiasB_AVX // unbias back to signed
- vpsubw ymm1, ymm1, kUVBiasG_AVX
- vpsubw ymm0, ymm0, kUVBiasR_AVX
-
- // Step 2: Find Y contribution to 16 R,G,B values
- vmovdqu xmm3, [eax] // NOLINT
- lea eax, [eax + 16]
- vpermq ymm3, ymm3, 0xd8
- vpunpcklbw ymm3, ymm3, ymm4
- vpsubsw ymm3, ymm3, kYSub16_AVX
- vpmullw ymm3, ymm3, kYToRgb_AVX
- vpaddsw ymm2, ymm2, ymm3 // B += Y
- vpaddsw ymm1, ymm1, ymm3 // G += Y
- vpaddsw ymm0, ymm0, ymm3 // R += Y
- vpsraw ymm2, ymm2, 6
- vpsraw ymm1, ymm1, 6
- vpsraw ymm0, ymm0, 6
- vpackuswb ymm2, ymm2, ymm2 // B
- vpackuswb ymm1, ymm1, ymm1 // G
- vpackuswb ymm0, ymm0, ymm0 // R
-
- // Step 3: Weave into ARGB
- vpunpcklbw ymm2, ymm2, ymm1 // BG
- vpermq ymm2, ymm2, 0xd8
- vpunpcklbw ymm0, ymm0, ymm5 // RA
- vpermq ymm0, ymm0, 0xd8
- vpunpcklwd ymm1, ymm2, ymm0 // BGRA first 8 pixels
- vpunpckhwd ymm2, ymm2, ymm0 // BGRA next 8 pixels
- vmovdqu [edx], ymm1
- vmovdqu [edx + 32], ymm2
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
- vzeroupper
-
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_I422TOARGBROW_AVX2
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-
-static const vec8 kUVToB = {
- UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
-};
-
-static const vec8 kUVToR = {
- UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
-};
-
-static const vec8 kUVToG = {
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
-};
-
-static const vec8 kVUToB = {
- VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
-};
-
-static const vec8 kVUToR = {
- VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
-};
-
-static const vec8 kVUToG = {
- VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
-};
-
-static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
-static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
-static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
-static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
-static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
-
-// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
-
-// Read 8 UV from 444.
-#define READYUV444 __asm { \
- __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \
- __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \
- __asm lea esi, [esi + 8] \
- __asm punpcklbw xmm0, xmm1 /* UV */ \
- }
-
-// Read 4 UV from 422, upsample to 8 UV.
-#define READYUV422 __asm { \
- __asm movd xmm0, [esi] /* U */ \
- __asm movd xmm1, [esi + edi] /* V */ \
- __asm lea esi, [esi + 4] \
- __asm punpcklbw xmm0, xmm1 /* UV */ \
- __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
- }
-
-// Read 2 UV from 411, upsample to 8 UV.
-#define READYUV411 __asm { \
- __asm movzx ebx, word ptr [esi] /* U */ /* NOLINT */ \
- __asm movd xmm0, ebx \
- __asm movzx ebx, word ptr [esi + edi] /* V */ /* NOLINT */ \
- __asm movd xmm1, ebx \
- __asm lea esi, [esi + 2] \
- __asm punpcklbw xmm0, xmm1 /* UV */ \
- __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
- __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \
- }
-
-// Read 4 UV from NV12, upsample to 8 UV.
-#define READNV12 __asm { \
- __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \
- __asm lea esi, [esi + 8] \
- __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
- }
-
-// Convert 8 pixels: 8 UV and 8 Y.
-#define YUVTORGB __asm { \
- /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
- __asm movdqa xmm1, xmm0 \
- __asm movdqa xmm2, xmm0 \
- __asm pmaddubsw xmm0, kUVToB /* scale B UV */ \
- __asm pmaddubsw xmm1, kUVToG /* scale G UV */ \
- __asm pmaddubsw xmm2, kUVToR /* scale R UV */ \
- __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
- __asm psubw xmm1, kUVBiasG \
- __asm psubw xmm2, kUVBiasR \
- /* Step 2: Find Y contribution to 8 R,G,B values */ \
- __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
- __asm lea eax, [eax + 8] \
- __asm punpcklbw xmm3, xmm4 \
- __asm psubsw xmm3, kYSub16 \
- __asm pmullw xmm3, kYToRgb \
- __asm paddsw xmm0, xmm3 /* B += Y */ \
- __asm paddsw xmm1, xmm3 /* G += Y */ \
- __asm paddsw xmm2, xmm3 /* R += Y */ \
- __asm psraw xmm0, 6 \
- __asm psraw xmm1, 6 \
- __asm psraw xmm2, 6 \
- __asm packuswb xmm0, xmm0 /* B */ \
- __asm packuswb xmm1, xmm1 /* G */ \
- __asm packuswb xmm2, xmm2 /* R */ \
- }
-
-// Convert 8 pixels: 8 VU and 8 Y.
-#define YVUTORGB __asm { \
- /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
- __asm movdqa xmm1, xmm0 \
- __asm movdqa xmm2, xmm0 \
- __asm pmaddubsw xmm0, kVUToB /* scale B UV */ \
- __asm pmaddubsw xmm1, kVUToG /* scale G UV */ \
- __asm pmaddubsw xmm2, kVUToR /* scale R UV */ \
- __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
- __asm psubw xmm1, kUVBiasG \
- __asm psubw xmm2, kUVBiasR \
- /* Step 2: Find Y contribution to 8 R,G,B values */ \
- __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
- __asm lea eax, [eax + 8] \
- __asm punpcklbw xmm3, xmm4 \
- __asm psubsw xmm3, kYSub16 \
- __asm pmullw xmm3, kYToRgb \
- __asm paddsw xmm0, xmm3 /* B += Y */ \
- __asm paddsw xmm1, xmm3 /* G += Y */ \
- __asm paddsw xmm2, xmm3 /* R += Y */ \
- __asm psraw xmm0, 6 \
- __asm psraw xmm1, 6 \
- __asm psraw xmm2, 6 \
- __asm packuswb xmm0, xmm0 /* B */ \
- __asm packuswb xmm1, xmm1 /* G */ \
- __asm packuswb xmm2, xmm2 /* R */ \
- }
-
-// 8 pixels, dest aligned 16.
-// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I444ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV444
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToRGB24Row_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgb24,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb24
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
- movdqa xmm5, kShuffleMaskARGBToRGB24_0
- movdqa xmm6, kShuffleMaskARGBToRGB24
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into RRGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm2 // RR
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRR first 4 pixels
- punpckhwd xmm1, xmm2 // BGRR next 4 pixels
- pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
- pshufb xmm1, xmm6 // Pack into first 12 bytes.
- palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
- movq qword ptr [edx], xmm0 // First 8 bytes
- movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
- lea edx, [edx + 24]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToRAWRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_raw,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // raw
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
- movdqa xmm5, kShuffleMaskARGBToRAW_0
- movdqa xmm6, kShuffleMaskARGBToRAW
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into RRGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm2 // RR
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRR first 4 pixels
- punpckhwd xmm1, xmm2 // BGRR next 4 pixels
- pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes.
- pshufb xmm1, xmm6 // Pack into first 12 bytes.
- palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1
- movq qword ptr [edx], xmm0 // First 8 bytes
- movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels.
- lea edx, [edx + 24]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest unaligned.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToRGB565Row_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb565_buf,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb565
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
- pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
- psrld xmm5, 27
- pcmpeqb xmm6, xmm6 // generate mask 0x000007e0
- psrld xmm6, 26
- pslld xmm6, 5
- pcmpeqb xmm7, xmm7 // generate mask 0xfffff800
- pslld xmm7, 11
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into RRGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm2 // RR
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRR first 4 pixels
- punpckhwd xmm1, xmm2 // BGRR next 4 pixels
-
- // Step 3b: RRGB -> RGB565
- movdqa xmm3, xmm0 // B first 4 pixels of argb
- movdqa xmm2, xmm0 // G
- pslld xmm0, 8 // R
- psrld xmm3, 3 // B
- psrld xmm2, 5 // G
- psrad xmm0, 16 // R
- pand xmm3, xmm5 // B
- pand xmm2, xmm6 // G
- pand xmm0, xmm7 // R
- por xmm3, xmm2 // BG
- por xmm0, xmm3 // BGR
- movdqa xmm3, xmm1 // B next 4 pixels of argb
- movdqa xmm2, xmm1 // G
- pslld xmm1, 8 // R
- psrld xmm3, 3 // B
- psrld xmm2, 5 // G
- psrad xmm1, 16 // R
- pand xmm3, xmm5 // B
- pand xmm2, xmm6 // G
- pand xmm1, xmm7 // R
- por xmm3, xmm2 // BG
- por xmm1, xmm3 // BGR
- packssdw xmm0, xmm1
- sub ecx, 8
- movdqu [edx], xmm0 // store 8 pixels of RGB565
- lea edx, [edx + 16]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-// Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
-void I411ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push ebx
- push esi
- push edi
- mov eax, [esp + 12 + 4] // Y
- mov esi, [esp + 12 + 8] // U
- mov edi, [esp + 12 + 12] // V
- mov edx, [esp + 12 + 16] // argb
- mov ecx, [esp + 12 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV411 // modifies EBX
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV12ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READNV12
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // VU
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READNV12
- YVUTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-// 8 pixels, unaligned.
-// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV444
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, unaligned.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels, unaligned.
-// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-// Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
-void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push ebx
- push esi
- push edi
- mov eax, [esp + 12 + 4] // Y
- mov esi, [esp + 12 + 8] // U
- mov edi, [esp + 12 + 12] // V
- mov edx, [esp + 12 + 16] // argb
- mov ecx, [esp + 12 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV411 // modifies EBX
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READNV12
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // VU
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READNV12
- YVUTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm0, xmm1 // BG
- punpcklbw xmm2, xmm5 // RA
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm2 // BGRA first 4 pixels
- punpckhwd xmm1, xmm2 // BGRA next 4 pixels
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToBGRARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // bgra
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into BGRA
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm0 // GB
- punpcklbw xmm5, xmm2 // AR
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // BGRA first 4 pixels
- punpckhwd xmm0, xmm1 // BGRA next 4 pixels
- movdqa [edx], xmm5
- movdqa [edx + 16], xmm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // bgra
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into BGRA
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm0 // GB
- punpcklbw xmm5, xmm2 // AR
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // BGRA first 4 pixels
- punpckhwd xmm0, xmm1 // BGRA next 4 pixels
- movdqu [edx], xmm5
- movdqu [edx + 16], xmm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToABGRRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // abgr
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm2, xmm1 // RG
- punpcklbw xmm0, xmm5 // BA
- movdqa xmm1, xmm2
- punpcklwd xmm2, xmm0 // RGBA first 4 pixels
- punpckhwd xmm1, xmm0 // RGBA next 4 pixels
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // abgr
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into ARGB
- punpcklbw xmm2, xmm1 // RG
- punpcklbw xmm0, xmm5 // BA
- movdqa xmm1, xmm2
- punpcklwd xmm2, xmm0 // RGBA first 4 pixels
- punpckhwd xmm1, xmm0 // RGBA next 4 pixels
- movdqu [edx], xmm2
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToRGBARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgba
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into RGBA
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm2 // GR
- punpcklbw xmm5, xmm0 // AB
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // RGBA first 4 pixels
- punpckhwd xmm0, xmm1 // RGBA next 4 pixels
- movdqa [edx], xmm5
- movdqa [edx + 16], xmm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgba
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pxor xmm4, xmm4
-
- align 4
- convertloop:
- READYUV422
- YUVTORGB
-
- // Step 3: Weave into RGBA
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
- punpcklbw xmm1, xmm2 // GR
- punpcklbw xmm5, xmm0 // AB
- movdqa xmm0, xmm5
- punpcklwd xmm5, xmm1 // RGBA first 4 pixels
- punpckhwd xmm0, xmm1 // RGBA next 4 pixels
- movdqu [edx], xmm5
- movdqu [edx + 16], xmm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-#endif // HAS_I422TOARGBROW_SSSE3
-
-#ifdef HAS_YTOARGBROW_SSE2
-__declspec(naked) __declspec(align(16))
-void YToARGBRow_SSE2(const uint8* y_buf,
- uint8* rgb_buf,
- int width) {
- __asm {
- pxor xmm5, xmm5
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
- mov eax, 0x00100010
- movd xmm3, eax
- pshufd xmm3, xmm3, 0
- mov eax, 0x004a004a // 74
- movd xmm2, eax
- pshufd xmm2, xmm2,0
- mov eax, [esp + 4] // Y
- mov edx, [esp + 8] // rgb
- mov ecx, [esp + 12] // width
-
- align 4
- convertloop:
- // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
- movq xmm0, qword ptr [eax]
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm5 // 0.Y
- psubusw xmm0, xmm3
- pmullw xmm0, xmm2
- psrlw xmm0, 6
- packuswb xmm0, xmm0 // G
-
- // Step 2: Weave into ARGB
- punpcklbw xmm0, xmm0 // GG
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm0 // BGRA first 4 pixels
- punpckhwd xmm1, xmm1 // BGRA next 4 pixels
- por xmm0, xmm4
- por xmm1, xmm4
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_YTOARGBROW_SSE2
-
-#ifdef HAS_MIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static const uvec8 kShuffleMirror = {
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked) __declspec(align(16))
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- movdqa xmm5, kShuffleMirror
- lea eax, [eax - 16]
-
- align 4
- convertloop:
- movdqa xmm0, [eax + ecx]
- pshufb xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-#endif // HAS_MIRRORROW_SSSE3
-
-#ifdef HAS_MIRRORROW_AVX2
-// Shuffle table for reversing the bytes.
-static const ulvec8 kShuffleMirror_AVX2 = {
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u,
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked) __declspec(align(16))
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- vmovdqa ymm5, kShuffleMirror_AVX2
- lea eax, [eax - 32]
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax + ecx]
- vpshufb ymm0, ymm0, ymm5
- vpermq ymm0, ymm0, 0x4e // swap high and low halfs
- sub ecx, 32
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_MIRRORROW_AVX2
-
-#ifdef HAS_MIRRORROW_SSE2
-// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
-// version can not.
-__declspec(naked) __declspec(align(16))
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- lea eax, [eax - 16]
-
- align 4
- convertloop:
- movdqu xmm0, [eax + ecx]
- movdqa xmm1, xmm0 // swap bytes
- psllw xmm0, 8
- psrlw xmm1, 8
- por xmm0, xmm1
- pshuflw xmm0, xmm0, 0x1b // swap words
- pshufhw xmm0, xmm0, 0x1b
- pshufd xmm0, xmm0, 0x4e // swap qwords
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-#endif // HAS_MIRRORROW_SSE2
-
-#ifdef HAS_MIRRORROW_UV_SSSE3
-// Shuffle table for reversing the bytes of UV channels.
-static const uvec8 kShuffleMirrorUV = {
- 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
-};
-
-__declspec(naked) __declspec(align(16))
-void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
- int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // width
- movdqa xmm1, kShuffleMirrorUV
- lea eax, [eax + ecx * 2 - 16]
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- lea eax, [eax - 16]
- pshufb xmm0, xmm1
- sub ecx, 8
- movlpd qword ptr [edx], xmm0
- movhpd qword ptr [edx + edi], xmm0
- lea edx, [edx + 8]
- jg convertloop
-
- pop edi
- ret
- }
-}
-#endif // HAS_MIRRORROW_UV_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static const uvec8 kARGBShuffleMirror = {
- 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
-};
-
-__declspec(naked) __declspec(align(16))
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- lea eax, [eax - 16 + ecx * 4] // last 4 pixels.
- movdqa xmm5, kARGBShuffleMirror
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- lea eax, [eax - 16]
- pshufb xmm0, xmm5
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBMIRRORROW_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_AVX2
-// Shuffle table for reversing the bytes.
-static const ulvec32 kARGBShuffleMirror_AVX2 = {
- 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked) __declspec(align(16))
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- lea eax, [eax - 32]
- vmovdqa ymm5, kARGBShuffleMirror_AVX2
-
- align 4
- convertloop:
- vpermd ymm0, ymm5, [eax + ecx * 4] // permute dword order
- sub ecx, 8
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBMIRRORROW_AVX2
-
-#ifdef HAS_SPLITUVROW_SSE2
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- pand xmm0, xmm5 // even bytes
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- psrlw xmm2, 8 // odd bytes
- psrlw xmm3, 8
- packuswb xmm2, xmm3
- movdqa [edx], xmm0
- movdqa [edx + edi], xmm2
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- pand xmm0, xmm5 // even bytes
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- psrlw xmm2, 8 // odd bytes
- psrlw xmm3, 8
- packuswb xmm2, xmm3
- movdqu [edx], xmm0
- movdqu [edx + edi], xmm2
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-#endif // HAS_SPLITUVROW_SSE2
-
-#ifdef HAS_SPLITUVROW_AVX2
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpsrlw ymm2, ymm0, 8 // odd bytes
- vpsrlw ymm3, ymm1, 8
- vpand ymm0, ymm0, ymm5 // even bytes
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1
- vpackuswb ymm2, ymm2, ymm3
- vpermq ymm0, ymm0, 0xd8
- vpermq ymm2, ymm2, 0xd8
- vmovdqu [edx], ymm0
- vmovdqu [edx + edi], ymm2
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_SPLITUVROW_AVX2
-
-#ifdef HAS_MERGEUVROW_SSE2
-__declspec(naked) __declspec(align(16))
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_u
- mov edx, [esp + 4 + 8] // src_v
- mov edi, [esp + 4 + 12] // dst_uv
- mov ecx, [esp + 4 + 16] // width
- sub edx, eax
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 16 U's
- movdqa xmm1, [eax + edx] // and 16 V's
- lea eax, [eax + 16]
- movdqa xmm2, xmm0
- punpcklbw xmm0, xmm1 // first 8 UV pairs
- punpckhbw xmm2, xmm1 // next 8 UV pairs
- movdqa [edi], xmm0
- movdqa [edi + 16], xmm2
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
- uint8* dst_uv, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_u
- mov edx, [esp + 4 + 8] // src_v
- mov edi, [esp + 4 + 12] // dst_uv
- mov ecx, [esp + 4 + 16] // width
- sub edx, eax
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // read 16 U's
- movdqu xmm1, [eax + edx] // and 16 V's
- lea eax, [eax + 16]
- movdqa xmm2, xmm0
- punpcklbw xmm0, xmm1 // first 8 UV pairs
- punpckhbw xmm2, xmm1 // next 8 UV pairs
- movdqu [edi], xmm0
- movdqu [edi + 16], xmm2
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-#endif // HAS_MERGEUVROW_SSE2
-
-#ifdef HAS_MERGEUVROW_AVX2
-__declspec(naked) __declspec(align(16))
-void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_u
- mov edx, [esp + 4 + 8] // src_v
- mov edi, [esp + 4 + 12] // dst_uv
- mov ecx, [esp + 4 + 16] // width
- sub edx, eax
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax] // read 32 U's
- vmovdqu ymm1, [eax + edx] // and 32 V's
- lea eax, [eax + 32]
- vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
- vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
- vperm2i128 ymm1, ymm2, ymm0, 0x20 // low 128 of ymm2 and low 128 of ymm0
- vperm2i128 ymm2, ymm2, ymm0, 0x31 // high 128 of ymm2 and high 128 of ymm0
- vmovdqu [edi], ymm1
- vmovdqu [edi + 32], ymm2
- lea edi, [edi + 64]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_MERGEUVROW_AVX2
-
-#ifdef HAS_COPYROW_SSE2
-// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
-__declspec(naked) __declspec(align(16))
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
- ret
- }
-}
-#endif // HAS_COPYROW_SSE2
-
-// Unaligned Multiple of 1.
-__declspec(naked) __declspec(align(16))
-void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
- __asm {
- mov eax, esi
- mov edx, edi
- mov esi, [esp + 4] // src
- mov edi, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- rep movsb
- mov edi, edx
- mov esi, eax
- ret
- }
-}
-
-#ifdef HAS_COPYROW_X86
-__declspec(naked) __declspec(align(16))
-void CopyRow_X86(const uint8* src, uint8* dst, int count) {
- __asm {
- mov eax, esi
- mov edx, edi
- mov esi, [esp + 4] // src
- mov edi, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- shr ecx, 2
- rep movsd
- mov edi, edx
- mov esi, eax
- ret
- }
-}
-#endif // HAS_COPYROW_X86
-
-#ifdef HAS_ARGBCOPYALPHAROW_SSE2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- pcmpeqb xmm0, xmm0 // generate mask 0xff000000
- pslld xmm0, 24
- pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
- psrld xmm1, 8
-
- align 4
- convertloop:
- movdqa xmm2, [eax]
- movdqa xmm3, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm4, [edx]
- movdqa xmm5, [edx + 16]
- pand xmm2, xmm0
- pand xmm3, xmm0
- pand xmm4, xmm1
- pand xmm5, xmm1
- por xmm2, xmm4
- por xmm3, xmm5
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm3
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBCOPYALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYALPHAROW_AVX2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- vpcmpeqb ymm0, ymm0, ymm0
- vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
-
- align 4
- convertloop:
- vmovdqu ymm1, [eax]
- vmovdqu ymm2, [eax + 32]
- lea eax, [eax + 64]
- vpblendvb ymm1, ymm1, [edx], ymm0
- vpblendvb ymm2, ymm2, [edx + 32], ymm0
- vmovdqu [edx], ymm1
- vmovdqu [edx + 32], ymm2
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBCOPYALPHAROW_AVX2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- pcmpeqb xmm0, xmm0 // generate mask 0xff000000
- pslld xmm0, 24
- pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
- psrld xmm1, 8
-
- align 4
- convertloop:
- movq xmm2, qword ptr [eax] // 8 Y's
- lea eax, [eax + 8]
- punpcklbw xmm2, xmm2
- punpckhwd xmm3, xmm2
- punpcklwd xmm2, xmm2
- movdqa xmm4, [edx]
- movdqa xmm5, [edx + 16]
- pand xmm2, xmm0
- pand xmm3, xmm0
- pand xmm4, xmm1
- pand xmm5, xmm1
- por xmm2, xmm4
- por xmm3, xmm5
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm3
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- vpcmpeqb ymm0, ymm0, ymm0
- vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
-
- align 4
- convertloop:
- vpmovzxbd ymm1, qword ptr [eax]
- vpmovzxbd ymm2, qword ptr [eax + 8]
- lea eax, [eax + 16]
- vpslld ymm1, ymm1, 24
- vpslld ymm2, ymm2, 24
- vpblendvb ymm1, ymm1, [edx], ymm0
- vpblendvb ymm2, ymm2, [edx + 32], ymm0
- vmovdqu [edx], ymm1
- vmovdqu [edx + 32], ymm2
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
-
-#ifdef HAS_SETROW_X86
-// SetRow8 writes 'count' bytes using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
-void SetRow_X86(uint8* dst, uint32 v32, int count) {
- __asm {
- mov edx, edi
- mov edi, [esp + 4] // dst
- mov eax, [esp + 8] // v32
- mov ecx, [esp + 12] // count
- shr ecx, 2
- rep stosd
- mov edi, edx
- ret
- }
-}
-
-// SetRow32 writes 'count' words using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
- int dst_stride, int height) {
- __asm {
- push esi
- push edi
- push ebp
- mov edi, [esp + 12 + 4] // dst
- mov eax, [esp + 12 + 8] // v32
- mov ebp, [esp + 12 + 12] // width
- mov edx, [esp + 12 + 16] // dst_stride
- mov esi, [esp + 12 + 20] // height
- lea ecx, [ebp * 4]
- sub edx, ecx // stride - width * 4
-
- align 4
- convertloop:
- mov ecx, ebp
- rep stosd
- add edi, edx
- sub esi, 1
- jg convertloop
-
- pop ebp
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_SETROW_X86
-
-#ifdef HAS_YUY2TOYROW_AVX2
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_AVX2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_yuy2
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpand ymm0, ymm0, ymm5 // even bytes are Y
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- sub ecx, 32
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- jg convertloop
- vzeroupper
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- lea eax, [eax + 64]
- vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToYRow_AVX2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_uyvy
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpsrlw ymm0, ymm0, 8 // odd bytes are Y
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- sub ecx, 32
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- jg convertloop
- ret
- vzeroupper
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- lea eax, [eax + 64]
- vpand ymm0, ymm0, ymm5 // UYVY -> UVUV
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpand ymm0, ymm0, ymm5 // UYVY -> UVUV
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_YUY2TOYROW_AVX2
-
-#ifdef HAS_YUY2TOYROW_SSE2
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_yuy2
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // even bytes are Y
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_yuy2
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // even bytes are Y
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToYRow_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_uyvy
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // odd bytes are Y
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_uyvy
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // odd bytes are Y
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- align 4
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-#endif // HAS_YUY2TOYROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSE2
-// Blend 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm7, xmm7 // generate constant 1
- psrlw xmm7, 15
- pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
- psrlw xmm6, 8
- pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
- psllw xmm5, 8
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
-
- sub ecx, 1
- je convertloop1 // only 1 pixel?
- jl convertloop1b
-
- // 1 pixel loop until destination pointer is aligned.
- alignloop1:
- test edx, 15 // aligned?
- je alignloop1b
- movd xmm3, [eax]
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge alignloop1
-
- alignloop1b:
- add ecx, 1 - 4
- jl convertloop4b
-
- // 4 pixel loop.
- convertloop4:
- movdqu xmm3, [eax] // src argb
- lea eax, [eax + 16]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movdqu xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movdqu xmm1, [esi] // _a_g
- lea esi, [esi + 16]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jge convertloop4
-
- convertloop4b:
- add ecx, 4 - 1
- jl convertloop1b
-
- // 1 pixel loop.
- convertloop1:
- movd xmm3, [eax] // src argb
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge convertloop1
-
- convertloop1b:
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBBLENDROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSSE3
-// Shuffle table for isolating alpha.
-static const uvec8 kShuffleAlpha = {
- 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
- 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
-};
-// Same as SSE2, but replaces:
-// psrlw xmm3, 8 // alpha
-// pshufhw xmm3, xmm3, 0F5h // 8 alpha words
-// pshuflw xmm3, xmm3, 0F5h
-// with..
-// pshufb xmm3, kShuffleAlpha // alpha
-// Blend 8 pixels at a time.
-
-__declspec(naked) __declspec(align(16))
-void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm7, xmm7 // generate constant 0x0001
- psrlw xmm7, 15
- pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
- psrlw xmm6, 8
- pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
- psllw xmm5, 8
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
-
- sub ecx, 1
- je convertloop1 // only 1 pixel?
- jl convertloop1b
-
- // 1 pixel loop until destination pointer is aligned.
- alignloop1:
- test edx, 15 // aligned?
- je alignloop1b
- movd xmm3, [eax]
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge alignloop1
-
- alignloop1b:
- add ecx, 1 - 4
- jl convertloop4b
-
- test eax, 15 // unaligned?
- jne convertuloop4
- test esi, 15 // unaligned?
- jne convertuloop4
-
- // 4 pixel loop.
- convertloop4:
- movdqa xmm3, [eax] // src argb
- lea eax, [eax + 16]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movdqa xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movdqa xmm1, [esi] // _a_g
- lea esi, [esi + 16]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jge convertloop4
- jmp convertloop4b
-
- // 4 pixel unaligned loop.
- convertuloop4:
- movdqu xmm3, [eax] // src argb
- lea eax, [eax + 16]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movdqu xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movdqu xmm1, [esi] // _a_g
- lea esi, [esi + 16]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jge convertuloop4
-
- convertloop4b:
- add ecx, 4 - 1
- jl convertloop1b
-
- // 1 pixel loop.
- convertloop1:
- movd xmm3, [eax] // src argb
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge convertloop1
-
- convertloop1b:
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBBLENDROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
- pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff
- psrld xmm5, 8
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 4 pixels
- punpcklbw xmm0, xmm0 // first 2
- pshufhw xmm2, xmm0, 0FFh // 8 alpha words
- pshuflw xmm2, xmm2, 0FFh
- pmulhuw xmm0, xmm2 // rgb * a
- movdqa xmm1, [eax] // read 4 pixels
- punpckhbw xmm1, xmm1 // next 2 pixels
- pshufhw xmm2, xmm1, 0FFh // 8 alpha words
- pshuflw xmm2, xmm2, 0FFh
- pmulhuw xmm1, xmm2 // rgb * a
- movdqa xmm2, [eax] // alphas
- lea eax, [eax + 16]
- psrlw xmm0, 8
- pand xmm2, xmm4
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- pand xmm0, xmm5 // keep original alphas
- por xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-// Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha0 = {
- 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
-};
-static const uvec8 kShuffleAlpha1 = {
- 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
- 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
-};
-__declspec(naked) __declspec(align(16))
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- pcmpeqb xmm3, xmm3 // generate mask 0xff000000
- pslld xmm3, 24
- movdqa xmm4, kShuffleAlpha0
- movdqa xmm5, kShuffleAlpha1
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels
- pshufb xmm0, xmm4 // isolate first 2 alphas
- movdqu xmm1, [eax] // read 4 pixels
- punpcklbw xmm1, xmm1 // first 2 pixel rgbs
- pmulhuw xmm0, xmm1 // rgb * a
- movdqu xmm1, [eax] // read 4 pixels
- pshufb xmm1, xmm5 // isolate next 2 alphas
- movdqu xmm2, [eax] // read 4 pixels
- punpckhbw xmm2, xmm2 // next 2 pixel rgbs
- pmulhuw xmm1, xmm2 // rgb * a
- movdqu xmm2, [eax] // mask original alpha
- lea eax, [eax + 16]
- pand xmm2, xmm3
- psrlw xmm0, 8
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- por xmm0, xmm2 // copy original alpha
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBATTENUATEROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const ulvec8 kShuffleAlpha_AVX2 = {
- 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
- 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
- 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
- 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
-};
-__declspec(naked) __declspec(align(16))
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- sub edx, eax
- vmovdqa ymm4, kShuffleAlpha_AVX2
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
- vpslld ymm5, ymm5, 24
-
- align 4
- convertloop:
- vmovdqu ymm6, [eax] // read 8 pixels.
- vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
- vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
- vpshufb ymm2, ymm0, ymm4 // low 4 alphas
- vpshufb ymm3, ymm1, ymm4 // high 4 alphas
- vpmulhuw ymm0, ymm0, ymm2 // rgb * a
- vpmulhuw ymm1, ymm1, ymm3 // rgb * a
- vpand ymm6, ymm6, ymm5 // isolate alpha
- vpsrlw ymm0, ymm0, 8
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // unmutated.
- vpor ymm0, ymm0, ymm6 // copy original alpha
- sub ecx, 8
- vmovdqu [eax + edx], ymm0
- lea eax, [eax + 32]
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-// Unattenuate 4 pixels at a time.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb0
- mov edx, [esp + 8 + 8] // dst_argb
- mov ecx, [esp + 8 + 12] // width
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels
- movzx esi, byte ptr [eax + 3] // first alpha
- movzx edi, byte ptr [eax + 7] // second alpha
- punpcklbw xmm0, xmm0 // first 2
- movd xmm2, dword ptr fixed_invtbl8[esi * 4]
- movd xmm3, dword ptr fixed_invtbl8[edi * 4]
- pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
- pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
- movlhps xmm2, xmm3
- pmulhuw xmm0, xmm2 // rgb * a
-
- movdqu xmm1, [eax] // read 4 pixels
- movzx esi, byte ptr [eax + 11] // third alpha
- movzx edi, byte ptr [eax + 15] // forth alpha
- punpckhbw xmm1, xmm1 // next 2
- movd xmm2, dword ptr fixed_invtbl8[esi * 4]
- movd xmm3, dword ptr fixed_invtbl8[edi * 4]
- pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
- pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
- movlhps xmm2, xmm3
- pmulhuw xmm1, xmm2 // rgb * a
- lea eax, [eax + 16]
-
- packuswb xmm0, xmm1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBUNATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
- 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
- 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
-};
-// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
-// USE_GATHER is not on by default, due to being a slow instruction.
-#ifdef USE_GATHER
-__declspec(naked) __declspec(align(16))
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- sub edx, eax
- vmovdqa ymm4, kUnattenShuffleAlpha_AVX2
-
- align 4
- convertloop:
- vmovdqu ymm6, [eax] // read 8 pixels.
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather.
- vpsrld ymm2, ymm6, 24 // alpha in low 8 bits.
- vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
- vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
- vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5 // ymm5 cleared. 1, a
- vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a
- vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated.
- vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas. 1, a, a, a
- vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas
- vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
- vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
- vpackuswb ymm0, ymm0, ymm1 // unmutated.
- sub ecx, 8
- vmovdqu [eax + edx], ymm0
- lea eax, [eax + 32]
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#else // USE_GATHER
-__declspec(naked) __declspec(align(16))
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- __asm {
-
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- sub edx, eax
- vmovdqa ymm5, kUnattenShuffleAlpha_AVX2
-
- push esi
- push edi
-
- align 4
- convertloop:
- // replace VPGATHER
- movzx esi, byte ptr [eax + 3] // alpha0
- movzx edi, byte ptr [eax + 7] // alpha1
- vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a0]
- vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a1]
- movzx esi, byte ptr [eax + 11] // alpha2
- movzx edi, byte ptr [eax + 15] // alpha3
- vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
- vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a2]
- vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a3]
- movzx esi, byte ptr [eax + 19] // alpha4
- movzx edi, byte ptr [eax + 23] // alpha5
- vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
- vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a4]
- vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a5]
- movzx esi, byte ptr [eax + 27] // alpha6
- movzx edi, byte ptr [eax + 31] // alpha7
- vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
- vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a6]
- vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a7]
- vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
- vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
- vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
- vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0]
- // end of VPGATHER
-
- vmovdqu ymm6, [eax] // read 8 pixels.
- vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
- vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
- vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a
- vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated.
- vpshufb ymm2, ymm2, ymm5 // replicate low 4 alphas. 1, a, a, a
- vpshufb ymm3, ymm3, ymm5 // replicate high 4 alphas
- vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
- vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
- vpackuswb ymm0, ymm0, ymm1 // unmutated.
- sub ecx, 8
- vmovdqu [eax + edx], ymm0
- lea eax, [eax + 32]
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // USE_GATHER
-#endif // HAS_ARGBATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBGRAYROW_SSSE3
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
-__declspec(naked) __declspec(align(16))
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_argb */
- mov ecx, [esp + 12] /* width */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // G
- movdqa xmm1, [eax + 16]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- phaddw xmm0, xmm1
- paddw xmm0, xmm5 // Add .5 for rounding.
- psrlw xmm0, 7
- packuswb xmm0, xmm0 // 8 G bytes
- movdqa xmm2, [eax] // A
- movdqa xmm3, [eax + 16]
- lea eax, [eax + 32]
- psrld xmm2, 24
- psrld xmm3, 24
- packuswb xmm2, xmm3
- packuswb xmm2, xmm2 // 8 A bytes
- movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA
- punpcklbw xmm0, xmm0 // 8 GG words
- punpcklbw xmm3, xmm2 // 8 GA words
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm3 // GGGA first 4
- punpckhwd xmm1, xmm3 // GGGA next 4
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBGRAYROW_SSSE3
-
-#ifdef HAS_ARGBSEPIAROW_SSSE3
-// b = (r * 35 + g * 68 + b * 17) >> 7
-// g = (r * 45 + g * 88 + b * 22) >> 7
-// r = (r * 50 + g * 98 + b * 24) >> 7
-// Constant for ARGB color to sepia tone.
-static const vec8 kARGBToSepiaB = {
- 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
-};
-
-static const vec8 kARGBToSepiaG = {
- 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
-};
-
-static const vec8 kARGBToSepiaR = {
- 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
-};
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-__declspec(naked) __declspec(align(16))
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] /* dst_argb */
- mov ecx, [esp + 8] /* width */
- movdqa xmm2, kARGBToSepiaB
- movdqa xmm3, kARGBToSepiaG
- movdqa xmm4, kARGBToSepiaR
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // B
- movdqa xmm6, [eax + 16]
- pmaddubsw xmm0, xmm2
- pmaddubsw xmm6, xmm2
- phaddw xmm0, xmm6
- psrlw xmm0, 7
- packuswb xmm0, xmm0 // 8 B values
- movdqa xmm5, [eax] // G
- movdqa xmm1, [eax + 16]
- pmaddubsw xmm5, xmm3
- pmaddubsw xmm1, xmm3
- phaddw xmm5, xmm1
- psrlw xmm5, 7
- packuswb xmm5, xmm5 // 8 G values
- punpcklbw xmm0, xmm5 // 8 BG values
- movdqa xmm5, [eax] // R
- movdqa xmm1, [eax + 16]
- pmaddubsw xmm5, xmm4
- pmaddubsw xmm1, xmm4
- phaddw xmm5, xmm1
- psrlw xmm5, 7
- packuswb xmm5, xmm5 // 8 R values
- movdqa xmm6, [eax] // A
- movdqa xmm1, [eax + 16]
- psrld xmm6, 24
- psrld xmm1, 24
- packuswb xmm6, xmm1
- packuswb xmm6, xmm6 // 8 A values
- punpcklbw xmm5, xmm6 // 8 RA values
- movdqa xmm1, xmm0 // Weave BG, RA together
- punpcklwd xmm0, xmm5 // BGRA first 4
- punpckhwd xmm1, xmm5 // BGRA next 4
- sub ecx, 8
- movdqa [eax], xmm0
- movdqa [eax + 16], xmm1
- lea eax, [eax + 32]
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBSEPIAROW_SSSE3
-
-#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// Same as Sepia except matrix is provided.
-// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
-// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
-__declspec(naked) __declspec(align(16))
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_argb */
- mov ecx, [esp + 12] /* matrix_argb */
- movdqu xmm5, [ecx]
- pshufd xmm2, xmm5, 0x00
- pshufd xmm3, xmm5, 0x55
- pshufd xmm4, xmm5, 0xaa
- pshufd xmm5, xmm5, 0xff
- mov ecx, [esp + 16] /* width */
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // B
- movdqa xmm7, [eax + 16]
- pmaddubsw xmm0, xmm2
- pmaddubsw xmm7, xmm2
- movdqa xmm6, [eax] // G
- movdqa xmm1, [eax + 16]
- pmaddubsw xmm6, xmm3
- pmaddubsw xmm1, xmm3
- phaddsw xmm0, xmm7 // B
- phaddsw xmm6, xmm1 // G
- psraw xmm0, 6 // B
- psraw xmm6, 6 // G
- packuswb xmm0, xmm0 // 8 B values
- packuswb xmm6, xmm6 // 8 G values
- punpcklbw xmm0, xmm6 // 8 BG values
- movdqa xmm1, [eax] // R
- movdqa xmm7, [eax + 16]
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm7, xmm4
- phaddsw xmm1, xmm7 // R
- movdqa xmm6, [eax] // A
- movdqa xmm7, [eax + 16]
- pmaddubsw xmm6, xmm5
- pmaddubsw xmm7, xmm5
- phaddsw xmm6, xmm7 // A
- psraw xmm1, 6 // R
- psraw xmm6, 6 // A
- packuswb xmm1, xmm1 // 8 R values
- packuswb xmm6, xmm6 // 8 A values
- punpcklbw xmm1, xmm6 // 8 RA values
- movdqa xmm6, xmm0 // Weave BG, RA together
- punpcklwd xmm0, xmm1 // BGRA first 4
- punpckhwd xmm6, xmm1 // BGRA next 4
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm6
- lea eax, [eax + 32]
- lea edx, [edx + 32]
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
-
-#ifdef HAS_ARGBQUANTIZEROW_SSE2
-// Quantize 4 ARGB pixels (16 bytes).
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- __asm {
- mov eax, [esp + 4] /* dst_argb */
- movd xmm2, [esp + 8] /* scale */
- movd xmm3, [esp + 12] /* interval_size */
- movd xmm4, [esp + 16] /* interval_offset */
- mov ecx, [esp + 20] /* width */
- pshuflw xmm2, xmm2, 040h
- pshufd xmm2, xmm2, 044h
- pshuflw xmm3, xmm3, 040h
- pshufd xmm3, xmm3, 044h
- pshuflw xmm4, xmm4, 040h
- pshufd xmm4, xmm4, 044h
- pxor xmm5, xmm5 // constant 0
- pcmpeqb xmm6, xmm6 // generate mask 0xff000000
- pslld xmm6, 24
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 4 pixels
- punpcklbw xmm0, xmm5 // first 2 pixels
- pmulhuw xmm0, xmm2 // pixel * scale >> 16
- movdqa xmm1, [eax] // read 4 pixels
- punpckhbw xmm1, xmm5 // next 2 pixels
- pmulhuw xmm1, xmm2
- pmullw xmm0, xmm3 // * interval_size
- movdqa xmm7, [eax] // read 4 pixels
- pmullw xmm1, xmm3
- pand xmm7, xmm6 // mask alpha
- paddw xmm0, xmm4 // + interval_size / 2
- paddw xmm1, xmm4
- packuswb xmm0, xmm1
- por xmm0, xmm7
- sub ecx, 4
- movdqa [eax], xmm0
- lea eax, [eax + 16]
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBQUANTIZEROW_SSE2
-
-#ifdef HAS_ARGBSHADEROW_SSE2
-// Shade 4 pixels at a time by specified value.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- movd xmm2, [esp + 16] // value
- punpcklbw xmm2, xmm2
- punpcklqdq xmm2, xmm2
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 4 pixels
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm0 // first 2
- punpckhbw xmm1, xmm1 // next 2
- pmulhuw xmm0, xmm2 // argb * value
- pmulhuw xmm1, xmm2 // argb * value
- psrlw xmm0, 8
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBSHADEROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- pxor xmm5, xmm5 // constant 0
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels from src_argb0
- movdqu xmm2, [esi] // read 4 pixels from src_argb1
- movdqu xmm1, xmm0
- movdqu xmm3, xmm2
- punpcklbw xmm0, xmm0 // first 2
- punpckhbw xmm1, xmm1 // next 2
- punpcklbw xmm2, xmm5 // first 2
- punpckhbw xmm3, xmm5 // next 2
- pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2
- pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2
- lea eax, [eax + 16]
- lea esi, [esi + 16]
- packuswb xmm0, xmm1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBMULTIPLYROW_SSE2
-
-#ifdef HAS_ARGBADDROW_SSE2
-// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-// TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked) __declspec(align(16))
-void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- sub ecx, 4
- jl convertloop49
-
- align 4
- convertloop4:
- movdqu xmm0, [eax] // read 4 pixels from src_argb0
- lea eax, [eax + 16]
- movdqu xmm1, [esi] // read 4 pixels from src_argb1
- lea esi, [esi + 16]
- paddusb xmm0, xmm1 // src_argb0 + src_argb1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jge convertloop4
-
- convertloop49:
- add ecx, 4 - 1
- jl convertloop19
-
- convertloop1:
- movd xmm0, [eax] // read 1 pixels from src_argb0
- lea eax, [eax + 4]
- movd xmm1, [esi] // read 1 pixels from src_argb1
- lea esi, [esi + 4]
- paddusb xmm0, xmm1 // src_argb0 + src_argb1
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge convertloop1
-
- convertloop19:
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBADDROW_SSE2
-
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- align 4
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels from src_argb0
- lea eax, [eax + 16]
- movdqu xmm1, [esi] // read 4 pixels from src_argb1
- lea esi, [esi + 16]
- psubusb xmm0, xmm1 // src_argb0 - src_argb1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBSUBTRACTROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_AVX2
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- vpxor ymm5, ymm5, ymm5 // constant 0
-
- align 4
- convertloop:
- vmovdqu ymm1, [eax] // read 8 pixels from src_argb0
- lea eax, [eax + 32]
- vmovdqu ymm3, [esi] // read 8 pixels from src_argb1
- lea esi, [esi + 32]
- vpunpcklbw ymm0, ymm1, ymm1 // low 4
- vpunpckhbw ymm1, ymm1, ymm1 // high 4
- vpunpcklbw ymm2, ymm3, ymm5 // low 4
- vpunpckhbw ymm3, ymm3, ymm5 // high 4
- vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4
- vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4
- vpackuswb ymm0, ymm0, ymm1
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBMULTIPLYROW_AVX2
-
-#ifdef HAS_ARGBADDROW_AVX2
-// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
- lea eax, [eax + 32]
- vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1
- lea esi, [esi + 32]
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBADDROW_AVX2
-
-#ifdef HAS_ARGBSUBTRACTROW_AVX2
-// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
- lea eax, [eax + 32]
- vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1
- lea esi, [esi + 32]
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBSUBTRACTROW_AVX2
-
-#ifdef HAS_SOBELXROW_SSE2
-// SobelX as a matrix is
-// -1 0 1
-// -2 0 2
-// -1 0 1
-__declspec(naked) __declspec(align(16))
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_y0
- mov esi, [esp + 8 + 8] // src_y1
- mov edi, [esp + 8 + 12] // src_y2
- mov edx, [esp + 8 + 16] // dst_sobelx
- mov ecx, [esp + 8 + 20] // width
- sub esi, eax
- sub edi, eax
- sub edx, eax
- pxor xmm5, xmm5 // constant 0
-
- align 4
- convertloop:
- movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
- movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- psubw xmm0, xmm1
- movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
- movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
- punpcklbw xmm1, xmm5
- punpcklbw xmm2, xmm5
- psubw xmm1, xmm2
- movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0]
- movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2]
- punpcklbw xmm2, xmm5
- punpcklbw xmm3, xmm5
- psubw xmm2, xmm3
- paddw xmm0, xmm2
- paddw xmm0, xmm1
- paddw xmm0, xmm1
- pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
- psubw xmm1, xmm0
- pmaxsw xmm0, xmm1
- packuswb xmm0, xmm0
- sub ecx, 8
- movq qword ptr [eax + edx], xmm0
- lea eax, [eax + 8]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELXROW_SSE2
-
-#ifdef HAS_SOBELYROW_SSE2
-// SobelY as a matrix is
-// -1 -2 -1
-// 0 0 0
-// 1 2 1
-__declspec(naked) __declspec(align(16))
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_y0
- mov esi, [esp + 4 + 8] // src_y1
- mov edx, [esp + 4 + 12] // dst_sobely
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
- sub edx, eax
- pxor xmm5, xmm5 // constant 0
-
- align 4
- convertloop:
- movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
- movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- psubw xmm0, xmm1
- movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1]
- movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1]
- punpcklbw xmm1, xmm5
- punpcklbw xmm2, xmm5
- psubw xmm1, xmm2
- movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
- movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
- punpcklbw xmm2, xmm5
- punpcklbw xmm3, xmm5
- psubw xmm2, xmm3
- paddw xmm0, xmm2
- paddw xmm0, xmm1
- paddw xmm0, xmm1
- pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
- psubw xmm1, xmm0
- pmaxsw xmm0, xmm1
- packuswb xmm0, xmm0
- sub ecx, 8
- movq qword ptr [eax + edx], xmm0
- lea eax, [eax + 8]
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELYROW_SSE2
-
-#ifdef HAS_SOBELROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-__declspec(naked) __declspec(align(16))
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_sobelx
- mov esi, [esp + 4 + 8] // src_sobely
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
- pcmpeqb xmm5, xmm5 // alpha 255
- pslld xmm5, 24 // 0xff000000
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 16 pixels src_sobelx
- movdqa xmm1, [eax + esi] // read 16 pixels src_sobely
- lea eax, [eax + 16]
- paddusb xmm0, xmm1 // sobel = sobelx + sobely
- movdqa xmm2, xmm0 // GG
- punpcklbw xmm2, xmm0 // First 8
- punpckhbw xmm0, xmm0 // Next 8
- movdqa xmm1, xmm2 // GGGG
- punpcklwd xmm1, xmm2 // First 4
- punpckhwd xmm2, xmm2 // Next 4
- por xmm1, xmm5 // GGGA
- por xmm2, xmm5
- movdqa xmm3, xmm0 // GGGG
- punpcklwd xmm3, xmm0 // Next 4
- punpckhwd xmm0, xmm0 // Last 4
- por xmm3, xmm5 // GGGA
- por xmm0, xmm5
- sub ecx, 16
- movdqa [edx], xmm1
- movdqa [edx + 16], xmm2
- movdqa [edx + 32], xmm3
- movdqa [edx + 48], xmm0
- lea edx, [edx + 64]
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELROW_SSE2
-
-#ifdef HAS_SOBELTOPLANEROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-__declspec(naked) __declspec(align(16))
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_sobelx
- mov esi, [esp + 4 + 8] // src_sobely
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 16 pixels src_sobelx
- movdqa xmm1, [eax + esi] // read 16 pixels src_sobely
- lea eax, [eax + 16]
- paddusb xmm0, xmm1 // sobel = sobelx + sobely
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELTOPLANEROW_SSE2
-
-#ifdef HAS_SOBELXYROW_SSE2
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-__declspec(naked) __declspec(align(16))
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_sobelx
- mov esi, [esp + 4 + 8] // src_sobely
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
- pcmpeqb xmm5, xmm5 // alpha 255
-
- align 4
- convertloop:
- movdqa xmm0, [eax] // read 16 pixels src_sobelx
- movdqa xmm1, [eax + esi] // read 16 pixels src_sobely
- lea eax, [eax + 16]
- movdqa xmm2, xmm0
- paddusb xmm2, xmm1 // sobel = sobelx + sobely
- movdqa xmm3, xmm0 // XA
- punpcklbw xmm3, xmm5
- punpckhbw xmm0, xmm5
- movdqa xmm4, xmm1 // YS
- punpcklbw xmm4, xmm2
- punpckhbw xmm1, xmm2
- movdqa xmm6, xmm4 // YSXA
- punpcklwd xmm6, xmm3 // First 4
- punpckhwd xmm4, xmm3 // Next 4
- movdqa xmm7, xmm1 // YSXA
- punpcklwd xmm7, xmm0 // Next 4
- punpckhwd xmm1, xmm0 // Last 4
- sub ecx, 16
- movdqa [edx], xmm6
- movdqa [edx + 16], xmm4
- movdqa [edx + 32], xmm7
- movdqa [edx + 48], xmm1
- lea edx, [edx + 64]
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELXYROW_SSE2
-
-#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-// Consider float CumulativeSum.
-// Consider calling CumulativeSum one row at time as needed.
-// Consider circular CumulativeSum buffer of radius * 2 + 1 height.
-// Convert cumulative sum for an area to an average for 1 pixel.
-// topleft is pointer to top left of CumulativeSum buffer for area.
-// botleft is pointer to bottom left of CumulativeSum buffer.
-// width is offset from left to right of area in CumulativeSum buffer measured
-// in number of ints.
-// area is the number of pixels in the area being averaged.
-// dst points to pixel to store result to.
-// count is number of averaged pixels to produce.
-// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte
-// aligned.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst,
- int count) {
- __asm {
- mov eax, topleft // eax topleft
- mov esi, botleft // esi botleft
- mov edx, width
- movd xmm5, area
- mov edi, dst
- mov ecx, count
- cvtdq2ps xmm5, xmm5
- rcpss xmm4, xmm5 // 1.0f / area
- pshufd xmm4, xmm4, 0
- sub ecx, 4
- jl l4b
-
- cmp area, 128 // 128 pixels will not overflow 15 bits.
- ja l4
-
- pshufd xmm5, xmm5, 0 // area
- pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0
- psrld xmm6, 16
- cvtdq2ps xmm6, xmm6
- addps xmm5, xmm6 // (65536.0 + area - 1)
- mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area
- cvtps2dq xmm5, xmm5 // 0.16 fixed point
- packssdw xmm5, xmm5 // 16 bit shorts
-
- // 4 pixel loop small blocks.
- align 4
- s4:
- // top left
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
-
- // - top right
- psubd xmm0, [eax + edx * 4]
- psubd xmm1, [eax + edx * 4 + 16]
- psubd xmm2, [eax + edx * 4 + 32]
- psubd xmm3, [eax + edx * 4 + 48]
- lea eax, [eax + 64]
-
- // - bottom left
- psubd xmm0, [esi]
- psubd xmm1, [esi + 16]
- psubd xmm2, [esi + 32]
- psubd xmm3, [esi + 48]
-
- // + bottom right
- paddd xmm0, [esi + edx * 4]
- paddd xmm1, [esi + edx * 4 + 16]
- paddd xmm2, [esi + edx * 4 + 32]
- paddd xmm3, [esi + edx * 4 + 48]
- lea esi, [esi + 64]
-
- packssdw xmm0, xmm1 // pack 4 pixels into 2 registers
- packssdw xmm2, xmm3
-
- pmulhuw xmm0, xmm5
- pmulhuw xmm2, xmm5
-
- packuswb xmm0, xmm2
- movdqu [edi], xmm0
- lea edi, [edi + 16]
- sub ecx, 4
- jge s4
-
- jmp l4b
-
- // 4 pixel loop
- align 4
- l4:
- // top left
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + 32]
- movdqa xmm3, [eax + 48]
-
- // - top right
- psubd xmm0, [eax + edx * 4]
- psubd xmm1, [eax + edx * 4 + 16]
- psubd xmm2, [eax + edx * 4 + 32]
- psubd xmm3, [eax + edx * 4 + 48]
- lea eax, [eax + 64]
-
- // - bottom left
- psubd xmm0, [esi]
- psubd xmm1, [esi + 16]
- psubd xmm2, [esi + 32]
- psubd xmm3, [esi + 48]
-
- // + bottom right
- paddd xmm0, [esi + edx * 4]
- paddd xmm1, [esi + edx * 4 + 16]
- paddd xmm2, [esi + edx * 4 + 32]
- paddd xmm3, [esi + edx * 4 + 48]
- lea esi, [esi + 64]
-
- cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area
- cvtdq2ps xmm1, xmm1
- mulps xmm0, xmm4
- mulps xmm1, xmm4
- cvtdq2ps xmm2, xmm2
- cvtdq2ps xmm3, xmm3
- mulps xmm2, xmm4
- mulps xmm3, xmm4
- cvtps2dq xmm0, xmm0
- cvtps2dq xmm1, xmm1
- cvtps2dq xmm2, xmm2
- cvtps2dq xmm3, xmm3
- packssdw xmm0, xmm1
- packssdw xmm2, xmm3
- packuswb xmm0, xmm2
- movdqu [edi], xmm0
- lea edi, [edi + 16]
- sub ecx, 4
- jge l4
-
- l4b:
- add ecx, 4 - 1
- jl l1b
-
- // 1 pixel loop
- align 4
- l1:
- movdqa xmm0, [eax]
- psubd xmm0, [eax + edx * 4]
- lea eax, [eax + 16]
- psubd xmm0, [esi]
- paddd xmm0, [esi + edx * 4]
- lea esi, [esi + 16]
- cvtdq2ps xmm0, xmm0
- mulps xmm0, xmm4
- cvtps2dq xmm0, xmm0
- packssdw xmm0, xmm0
- packuswb xmm0, xmm0
- movd dword ptr [edi], xmm0
- lea edi, [edi + 4]
- sub ecx, 1
- jge l1
- l1b:
- }
-}
-#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-
-#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
-// Creates a table of cumulative sums where each value is a sum of all values
-// above and to the left of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) {
- __asm {
- mov eax, row
- mov edx, cumsum
- mov esi, previous_cumsum
- mov ecx, width
- pxor xmm0, xmm0
- pxor xmm1, xmm1
-
- sub ecx, 4
- jl l4b
- test edx, 15
- jne l4b
-
- // 4 pixel loop
- align 4
- l4:
- movdqu xmm2, [eax] // 4 argb pixels 16 bytes.
- lea eax, [eax + 16]
- movdqa xmm4, xmm2
-
- punpcklbw xmm2, xmm1
- movdqa xmm3, xmm2
- punpcklwd xmm2, xmm1
- punpckhwd xmm3, xmm1
-
- punpckhbw xmm4, xmm1
- movdqa xmm5, xmm4
- punpcklwd xmm4, xmm1
- punpckhwd xmm5, xmm1
-
- paddd xmm0, xmm2
- movdqa xmm2, [esi] // previous row above.
- paddd xmm2, xmm0
-
- paddd xmm0, xmm3
- movdqa xmm3, [esi + 16]
- paddd xmm3, xmm0
-
- paddd xmm0, xmm4
- movdqa xmm4, [esi + 32]
- paddd xmm4, xmm0
-
- paddd xmm0, xmm5
- movdqa xmm5, [esi + 48]
- lea esi, [esi + 64]
- paddd xmm5, xmm0
-
- movdqa [edx], xmm2
- movdqa [edx + 16], xmm3
- movdqa [edx + 32], xmm4
- movdqa [edx + 48], xmm5
-
- lea edx, [edx + 64]
- sub ecx, 4
- jge l4
-
- l4b:
- add ecx, 4 - 1
- jl l1b
-
- // 1 pixel loop
- align 4
- l1:
- movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
- lea eax, [eax + 4]
- punpcklbw xmm2, xmm1
- punpcklwd xmm2, xmm1
- paddd xmm0, xmm2
- movdqu xmm2, [esi]
- lea esi, [esi + 16]
- paddd xmm2, xmm0
- movdqu [edx], xmm2
- lea edx, [edx + 16]
- sub ecx, 1
- jge l1
-
- l1b:
- }
-}
-#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
-
-#ifdef HAS_ARGBAFFINEROW_SSE2
-// Copy ARGB pixels from source image with slope to a row of destination.
-__declspec(naked) __declspec(align(16))
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 12] // src_argb
- mov esi, [esp + 16] // stride
- mov edx, [esp + 20] // dst_argb
- mov ecx, [esp + 24] // pointer to uv_dudv
- movq xmm2, qword ptr [ecx] // uv
- movq xmm7, qword ptr [ecx + 8] // dudv
- mov ecx, [esp + 28] // width
- shl esi, 16 // 4, stride
- add esi, 4
- movd xmm5, esi
- sub ecx, 4
- jl l4b
-
- // setup for 4 pixel loop
- pshufd xmm7, xmm7, 0x44 // dup dudv
- pshufd xmm5, xmm5, 0 // dup 4, stride
- movdqa xmm0, xmm2 // x0, y0, x1, y1
- addps xmm0, xmm7
- movlhps xmm2, xmm0
- movdqa xmm4, xmm7
- addps xmm4, xmm4 // dudv *= 2
- movdqa xmm3, xmm2 // x2, y2, x3, y3
- addps xmm3, xmm4
- addps xmm4, xmm4 // dudv *= 4
-
- // 4 pixel loop
- align 4
- l4:
- cvttps2dq xmm0, xmm2 // x, y float to int first 2
- cvttps2dq xmm1, xmm3 // x, y float to int next 2
- packssdw xmm0, xmm1 // x, y as 8 shorts
- pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride.
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // shift right
- movd edi, xmm0
- pshufd xmm0, xmm0, 0x39 // shift right
- movd xmm1, [eax + esi] // read pixel 0
- movd xmm6, [eax + edi] // read pixel 1
- punpckldq xmm1, xmm6 // combine pixel 0 and 1
- addps xmm2, xmm4 // x, y += dx, dy first 2
- movq qword ptr [edx], xmm1
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // shift right
- movd edi, xmm0
- movd xmm6, [eax + esi] // read pixel 2
- movd xmm0, [eax + edi] // read pixel 3
- punpckldq xmm6, xmm0 // combine pixel 2 and 3
- addps xmm3, xmm4 // x, y += dx, dy next 2
- sub ecx, 4
- movq qword ptr 8[edx], xmm6
- lea edx, [edx + 16]
- jge l4
-
- l4b:
- add ecx, 4 - 1
- jl l1b
-
- // 1 pixel loop
- align 4
- l1:
- cvttps2dq xmm0, xmm2 // x, y float to int
- packssdw xmm0, xmm0 // x, y as shorts
- pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride
- addps xmm2, xmm7 // x, y += dx, dy
- movd esi, xmm0
- movd xmm0, [eax + esi] // copy a pixel
- sub ecx, 1
- movd [edx], xmm0
- lea edx, [edx + 4]
- jge l1
- l1b:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBAFFINEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_AVX2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- shr eax, 1
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- sub edi, esi
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
-
- vmovd xmm0, eax // high fraction 0..127
- neg eax
- add eax, 128
- vmovd xmm5, eax // low fraction 128..1
- vpunpcklbw xmm5, xmm5, xmm0
- vpunpcklwd xmm5, xmm5, xmm5
- vpxor ymm0, ymm0, ymm0
- vpermd ymm5, ymm0, ymm5
-
- align 4
- xloop:
- vmovdqu ymm0, [esi]
- vmovdqu ymm2, [esi + edx]
- vpunpckhbw ymm1, ymm0, ymm2 // mutates
- vpunpcklbw ymm0, ymm0, ymm2 // mutates
- vpmaddubsw ymm0, ymm0, ymm5
- vpmaddubsw ymm1, ymm1, ymm5
- vpsrlw ymm0, ymm0, 7
- vpsrlw ymm1, ymm1, 7
- vpackuswb ymm0, ymm0, ymm1 // unmutates
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- vmovdqu ymm0, [esi]
- vpavgb ymm0, ymm0, [esi + edx]
- vpavgb ymm0, ymm0, [esi + edx]
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- vmovdqu ymm0, [esi]
- vpavgb ymm0, ymm0, [esi + edx]
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- vmovdqu ymm0, [esi + edx]
- vpavgb ymm0, ymm0, [esi]
- vpavgb ymm0, ymm0, [esi]
- sub ecx, 32
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- rep movsb
-
- xloop99:
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_AVX2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- shr eax, 1
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
-
- movd xmm0, eax // high fraction 0..127
- neg eax
- add eax, 128
- movd xmm5, eax // low fraction 128..1
- punpcklbw xmm5, xmm0
- punpcklwd xmm5, xmm5
- pshufd xmm5, xmm5, 0
-
- align 4
- xloop:
- movdqa xmm0, [esi]
- movdqa xmm2, [esi + edx]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- pmaddubsw xmm0, xmm5
- pmaddubsw xmm1, xmm5
- psrlw xmm0, 7
- psrlw xmm1, 7
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqa xmm1, [esi]
- movdqa xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- movdqa xmm0, [esi]
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 256. Blend 100 / 0.
- cmp eax, 64
- je xloop75 // 64 / 256 is 0.25. Blend 75 / 25.
- cmp eax, 128
- je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
- cmp eax, 192
- je xloop25 // 192 / 256 is 0.75. Blend 25 / 75.
-
- movd xmm5, eax // xmm5 = y fraction
- punpcklbw xmm5, xmm5
- psrlw xmm5, 1
- punpcklwd xmm5, xmm5
- punpckldq xmm5, xmm5
- punpcklqdq xmm5, xmm5
- pxor xmm4, xmm4
-
- align 4
- xloop:
- movdqa xmm0, [esi] // row0
- movdqa xmm2, [esi + edx] // row1
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
- punpcklbw xmm0, xmm4
- punpckhbw xmm1, xmm4
- psubw xmm2, xmm0 // row1 - row0
- psubw xmm3, xmm1
- paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16
- paddw xmm3, xmm3
- pmulhw xmm2, xmm5 // scale diff
- pmulhw xmm3, xmm5
- paddw xmm0, xmm2 // sum rows
- paddw xmm1, xmm3
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi + edx]
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqa xmm1, [esi]
- movdqa xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- movdqa xmm0, [esi]
- sub ecx, 16
- movdqa [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- shr eax, 1
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
-
- movd xmm0, eax // high fraction 0..127
- neg eax
- add eax, 128
- movd xmm5, eax // low fraction 128..1
- punpcklbw xmm5, xmm0
- punpcklwd xmm5, xmm5
- pshufd xmm5, xmm5, 0
-
- align 4
- xloop:
- movdqu xmm0, [esi]
- movdqu xmm2, [esi + edx]
- movdqu xmm1, xmm0
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- pmaddubsw xmm0, xmm5
- pmaddubsw xmm1, xmm5
- psrlw xmm0, 7
- psrlw xmm1, 7
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqu xmm1, [esi]
- movdqu xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- movdqu xmm0, [esi]
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 256. Blend 100 / 0.
- cmp eax, 64
- je xloop75 // 64 / 256 is 0.25. Blend 75 / 25.
- cmp eax, 128
- je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
- cmp eax, 192
- je xloop25 // 192 / 256 is 0.75. Blend 25 / 75.
-
- movd xmm5, eax // xmm5 = y fraction
- punpcklbw xmm5, xmm5
- psrlw xmm5, 1
- punpcklwd xmm5, xmm5
- punpckldq xmm5, xmm5
- punpcklqdq xmm5, xmm5
- pxor xmm4, xmm4
-
- align 4
- xloop:
- movdqu xmm0, [esi] // row0
- movdqu xmm2, [esi + edx] // row1
- movdqu xmm1, xmm0
- movdqu xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
- punpcklbw xmm0, xmm4
- punpckhbw xmm1, xmm4
- psubw xmm2, xmm0 // row1 - row0
- psubw xmm3, xmm1
- paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16
- paddw xmm3, xmm3
- pmulhw xmm2, xmm5 // scale diff
- pmulhw xmm3, xmm5
- paddw xmm0, xmm2 // sum rows
- paddw xmm1, xmm3
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- align 4
- xloop25:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- align 4
- xloop50:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- align 4
- xloop75:
- movdqu xmm1, [esi]
- movdqu xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- align 4
- xloop100:
- movdqu xmm0, [esi]
- sub ecx, 16
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-__declspec(naked) __declspec(align(16))
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // src_uv_stride
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- sub edi, eax
-
- align 4
- convertloop:
- movdqa xmm0, [eax]
- pavgb xmm0, [eax + edx]
- sub ecx, 16
- movdqa [eax + edi], xmm0
- lea eax, [eax + 16]
- jg convertloop
- pop edi
- ret
- }
-}
-
-#ifdef HAS_HALFROW_AVX2
-__declspec(naked) __declspec(align(16))
-void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // src_uv_stride
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- sub edi, eax
-
- align 4
- convertloop:
- vmovdqu ymm0, [eax]
- vpavgb ymm0, ymm0, [eax + edx]
- sub ecx, 32
- vmovdqu [eax + edi], ymm0
- lea eax, [eax + 32]
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_HALFROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_bayer
- movd xmm5, [esp + 12] // selector
- mov ecx, [esp + 16] // pix
- pshufd xmm5, xmm5, 0
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pshufb xmm0, xmm5
- pshufb xmm1, xmm5
- punpckldq xmm0, xmm1
- sub ecx, 8
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg wloop
- ret
- }
-}
-
-// Specialized ARGB to Bayer that just isolates G channel.
-__declspec(naked) __declspec(align(16))
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
- uint32 selector, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_bayer
- // selector
- mov ecx, [esp + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x000000ff
- psrld xmm5, 24
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrld xmm0, 8 // Move green to bottom.
- psrld xmm1, 8
- pand xmm0, xmm5
- pand xmm1, xmm5
- packssdw xmm0, xmm1
- packuswb xmm0, xmm1
- sub ecx, 8
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg wloop
- ret
- }
-}
-
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // shuffler
- movdqa xmm5, [ecx]
- mov ecx, [esp + 16] // pix
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pshufb xmm0, xmm5
- pshufb xmm1, xmm5
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- jg wloop
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // shuffler
- movdqa xmm5, [ecx]
- mov ecx, [esp + 16] // pix
-
- align 4
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pshufb xmm0, xmm5
- pshufb xmm1, xmm5
- sub ecx, 8
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- jg wloop
- ret
- }
-}
-
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // shuffler
- vbroadcastf128 ymm5, [ecx] // same shuffle in high as low.
- mov ecx, [esp + 16] // pix
-
- align 4
- wloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpshufb ymm0, ymm0, ymm5
- vpshufb ymm1, ymm1, ymm5
- sub ecx, 16
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- jg wloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBSHUFFLEROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- push ebx
- push esi
- mov eax, [esp + 8 + 4] // src_argb
- mov edx, [esp + 8 + 8] // dst_argb
- mov esi, [esp + 8 + 12] // shuffler
- mov ecx, [esp + 8 + 16] // pix
- pxor xmm5, xmm5
-
- mov ebx, [esi] // shuffler
- cmp ebx, 0x03000102
- je shuf_3012
- cmp ebx, 0x00010203
- je shuf_0123
- cmp ebx, 0x00030201
- je shuf_0321
- cmp ebx, 0x02010003
- je shuf_2103
-
- // TODO(fbarchard): Use one source pointer and 3 offsets.
- shuf_any1:
- movzx ebx, byte ptr [esi]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx], bl
- movzx ebx, byte ptr [esi + 1]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx + 1], bl
- movzx ebx, byte ptr [esi + 2]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx + 2], bl
- movzx ebx, byte ptr [esi + 3]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx + 3], bl
- lea eax, [eax + 4]
- lea edx, [edx + 4]
- sub ecx, 1
- jg shuf_any1
- jmp shuf99
-
- align 4
- shuf_0123:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB
- pshuflw xmm0, xmm0, 01Bh
- pshufhw xmm1, xmm1, 01Bh
- pshuflw xmm1, xmm1, 01Bh
- packuswb xmm0, xmm1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg shuf_0123
- jmp shuf99
-
- align 4
- shuf_0321:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB
- pshuflw xmm0, xmm0, 039h
- pshufhw xmm1, xmm1, 039h
- pshuflw xmm1, xmm1, 039h
- packuswb xmm0, xmm1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg shuf_0321
- jmp shuf99
-
- align 4
- shuf_2103:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA
- pshuflw xmm0, xmm0, 093h
- pshufhw xmm1, xmm1, 093h
- pshuflw xmm1, xmm1, 093h
- packuswb xmm0, xmm1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg shuf_2103
- jmp shuf99
-
- align 4
- shuf_3012:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB
- pshuflw xmm0, xmm0, 0C6h
- pshufhw xmm1, xmm1, 0C6h
- pshuflw xmm1, xmm1, 0C6h
- packuswb xmm0, xmm1
- sub ecx, 4
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg shuf_3012
-
- shuf99:
- pop esi
- pop ebx
- ret
- }
-}
-
-// YUY2 - Macro-pixel = 2 image pixels
-// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
-
-// UYVY - Macro-pixel = 2 image pixels
-// U0Y0V0Y1
-
-__declspec(naked) __declspec(align(16))
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_y
- mov esi, [esp + 8 + 8] // src_u
- mov edx, [esp + 8 + 12] // src_v
- mov edi, [esp + 8 + 16] // dst_frame
- mov ecx, [esp + 8 + 20] // width
- sub edx, esi
-
- align 4
- convertloop:
- movq xmm2, qword ptr [esi] // U
- movq xmm3, qword ptr [esi + edx] // V
- lea esi, [esi + 8]
- punpcklbw xmm2, xmm3 // UV
- movdqu xmm0, [eax] // Y
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm2 // YUYV
- punpckhbw xmm1, xmm2
- movdqu [edi], xmm0
- movdqu [edi + 16], xmm1
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_y
- mov esi, [esp + 8 + 8] // src_u
- mov edx, [esp + 8 + 12] // src_v
- mov edi, [esp + 8 + 16] // dst_frame
- mov ecx, [esp + 8 + 20] // width
- sub edx, esi
-
- align 4
- convertloop:
- movq xmm2, qword ptr [esi] // U
- movq xmm3, qword ptr [esi + edx] // V
- lea esi, [esi + 8]
- punpcklbw xmm2, xmm3 // UV
- movdqu xmm0, [eax] // Y
- movdqa xmm1, xmm2
- lea eax, [eax + 16]
- punpcklbw xmm1, xmm0 // UYVY
- punpckhbw xmm2, xmm0
- movdqu [edi], xmm1
- movdqu [edi + 16], xmm2
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-__declspec(naked) __declspec(align(16))
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] /* src_argb */
- mov edx, [esp + 4 + 8] /* dst_argb */
- mov esi, [esp + 4 + 12] /* poly */
- mov ecx, [esp + 4 + 16] /* width */
- pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints.
-
- // 2 pixel loop.
- align 4
- convertloop:
-// pmovzxbd xmm0, dword ptr [eax] // BGRA pixel
-// pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel
- movq xmm0, qword ptr [eax] // BGRABGRA
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm3
- movdqa xmm4, xmm0
- punpcklwd xmm0, xmm3 // pixel 0
- punpckhwd xmm4, xmm3 // pixel 1
- cvtdq2ps xmm0, xmm0 // 4 floats
- cvtdq2ps xmm4, xmm4
- movdqa xmm1, xmm0 // X
- movdqa xmm5, xmm4
- mulps xmm0, [esi + 16] // C1 * X
- mulps xmm4, [esi + 16]
- addps xmm0, [esi] // result = C0 + C1 * X
- addps xmm4, [esi]
- movdqa xmm2, xmm1
- movdqa xmm6, xmm5
- mulps xmm2, xmm1 // X * X
- mulps xmm6, xmm5
- mulps xmm1, xmm2 // X * X * X
- mulps xmm5, xmm6
- mulps xmm2, [esi + 32] // C2 * X * X
- mulps xmm6, [esi + 32]
- mulps xmm1, [esi + 48] // C3 * X * X * X
- mulps xmm5, [esi + 48]
- addps xmm0, xmm2 // result += C2 * X * X
- addps xmm4, xmm6
- addps xmm0, xmm1 // result += C3 * X * X * X
- addps xmm4, xmm5
- cvttps2dq xmm0, xmm0
- cvttps2dq xmm4, xmm4
- packuswb xmm0, xmm4
- packuswb xmm0, xmm0
- sub ecx, 2
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg convertloop
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBPOLYNOMIALROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-__declspec(naked) __declspec(align(16))
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_argb */
- mov ecx, [esp + 12] /* poly */
- vbroadcastf128 ymm4, [ecx] // C0
- vbroadcastf128 ymm5, [ecx + 16] // C1
- vbroadcastf128 ymm6, [ecx + 32] // C2
- vbroadcastf128 ymm7, [ecx + 48] // C3
- mov ecx, [esp + 16] /* width */
-
- // 2 pixel loop.
- align 4
- convertloop:
- vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels
- lea eax, [eax + 8]
- vcvtdq2ps ymm0, ymm0 // X 8 floats
- vmulps ymm2, ymm0, ymm0 // X * X
- vmulps ymm3, ymm0, ymm7 // C3 * X
- vfmadd132ps ymm0, ymm4, ymm5 // result = C0 + C1 * X
- vfmadd231ps ymm0, ymm2, ymm6 // result += C2 * X * X
- vfmadd231ps ymm0, ymm2, ymm3 // result += C3 * X * X * X
- vcvttps2dq ymm0, ymm0
- vpackusdw ymm0, ymm0, ymm0 // b0g0r0a0_00000000_b0g0r0a0_00000000
- vpermq ymm0, ymm0, 0xd8 // b0g0r0a0_b0g0r0a0_00000000_00000000
- vpackuswb xmm0, xmm0, xmm0 // bgrabgra_00000000_00000000_00000000
- sub ecx, 2
- vmovq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBPOLYNOMIALROW_AVX2
-
-#ifdef HAS_ARGBCOLORTABLEROW_X86
-// Tranform ARGB pixels with color table.
-__declspec(naked) __declspec(align(16))
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] /* dst_argb */
- mov esi, [esp + 4 + 8] /* table_argb */
- mov ecx, [esp + 4 + 12] /* width */
-
- // 1 pixel loop.
- align 4
- convertloop:
- movzx edx, byte ptr [eax]
- lea eax, [eax + 4]
- movzx edx, byte ptr [esi + edx * 4]
- mov byte ptr [eax - 4], dl
- movzx edx, byte ptr [eax - 4 + 1]
- movzx edx, byte ptr [esi + edx * 4 + 1]
- mov byte ptr [eax - 4 + 1], dl
- movzx edx, byte ptr [eax - 4 + 2]
- movzx edx, byte ptr [esi + edx * 4 + 2]
- mov byte ptr [eax - 4 + 2], dl
- movzx edx, byte ptr [eax - 4 + 3]
- movzx edx, byte ptr [esi + edx * 4 + 3]
- mov byte ptr [eax - 4 + 3], dl
- dec ecx
- jg convertloop
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBCOLORTABLEROW_X86
-
-#ifdef HAS_RGBCOLORTABLEROW_X86
-// Tranform RGB pixels with color table.
-__declspec(naked) __declspec(align(16))
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] /* dst_argb */
- mov esi, [esp + 4 + 8] /* table_argb */
- mov ecx, [esp + 4 + 12] /* width */
-
- // 1 pixel loop.
- align 4
- convertloop:
- movzx edx, byte ptr [eax]
- lea eax, [eax + 4]
- movzx edx, byte ptr [esi + edx * 4]
- mov byte ptr [eax - 4], dl
- movzx edx, byte ptr [eax - 4 + 1]
- movzx edx, byte ptr [esi + edx * 4 + 1]
- mov byte ptr [eax - 4 + 1], dl
- movzx edx, byte ptr [eax - 4 + 2]
- movzx edx, byte ptr [esi + edx * 4 + 2]
- mov byte ptr [eax - 4 + 2], dl
- dec ecx
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_RGBCOLORTABLEROW_X86
-
-#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
-// Tranform RGB pixels with luma table.
-__declspec(naked) __declspec(align(16))
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width,
- const uint8* luma, uint32 lumacoeff) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] /* src_argb */
- mov edi, [esp + 8 + 8] /* dst_argb */
- mov ecx, [esp + 8 + 12] /* width */
- movd xmm2, dword ptr [esp + 8 + 16] // luma table
- movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff
- pshufd xmm2, xmm2, 0
- pshufd xmm3, xmm3, 0
- pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00
- psllw xmm4, 8
- pxor xmm5, xmm5
-
- // 4 pixel loop.
- align 4
- convertloop:
- movdqu xmm0, qword ptr [eax] // generate luma ptr
- pmaddubsw xmm0, xmm3
- phaddw xmm0, xmm0
- pand xmm0, xmm4 // mask out low bits
- punpcklwd xmm0, xmm5
- paddd xmm0, xmm2 // add table base
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
-
- movzx edx, byte ptr [eax]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi], dl
- movzx edx, byte ptr [eax + 1]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 1], dl
- movzx edx, byte ptr [eax + 2]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 2], dl
- movzx edx, byte ptr [eax + 3] // copy alpha.
- mov byte ptr [edi + 3], dl
-
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
-
- movzx edx, byte ptr [eax + 4]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 4], dl
- movzx edx, byte ptr [eax + 5]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 5], dl
- movzx edx, byte ptr [eax + 6]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 6], dl
- movzx edx, byte ptr [eax + 7] // copy alpha.
- mov byte ptr [edi + 7], dl
-
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
-
- movzx edx, byte ptr [eax + 8]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 8], dl
- movzx edx, byte ptr [eax + 9]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 9], dl
- movzx edx, byte ptr [eax + 10]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 10], dl
- movzx edx, byte ptr [eax + 11] // copy alpha.
- mov byte ptr [edi + 11], dl
-
- movd esi, xmm0
-
- movzx edx, byte ptr [eax + 12]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 12], dl
- movzx edx, byte ptr [eax + 13]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 13], dl
- movzx edx, byte ptr [eax + 14]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 14], dl
- movzx edx, byte ptr [eax + 15] // copy alpha.
- mov byte ptr [edi + 15], dl
-
- sub ecx, 4
- lea eax, [eax + 16]
- lea edi, [edi + 16]
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
-
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm b/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm
deleted file mode 100755
index 0cb326f8e5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm
+++ /dev/null
@@ -1,146 +0,0 @@
-;
-; Copyright 2012 The LibYuv Project Authors. All rights reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%ifdef __YASM_VERSION_ID__
-%if __YASM_VERSION_ID__ < 01020000h
-%error AVX2 is supported only by yasm 1.2.0 or later.
-%endif
-%endif
-%include "x86inc.asm"
-
-SECTION .text
-
-; cglobal numeric constants are parameters, gpr regs, mm regs
-
-; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
-
-%macro YUY2TOYROW 2-3
-cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
-%ifidn %1,YUY2
- pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
- psrlw m2, m2, 8
-%endif
-
- ALIGN 4
-.convertloop:
- mov%2 m0, [src_yuy2q]
- mov%2 m1, [src_yuy2q + mmsize]
- lea src_yuy2q, [src_yuy2q + mmsize * 2]
-%ifidn %1,YUY2
- pand m0, m0, m2 ; YUY2 even bytes are Y
- pand m1, m1, m2
-%else
- psrlw m0, m0, 8 ; UYVY odd bytes are Y
- psrlw m1, m1, 8
-%endif
- packuswb m0, m0, m1
-%if cpuflag(AVX2)
- vpermq m0, m0, 0xd8
-%endif
- sub pixd, mmsize
- mov%2 [dst_yq], m0
- lea dst_yq, [dst_yq + mmsize]
- jg .convertloop
- REP_RET
-%endmacro
-
-; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
-INIT_MMX MMX
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_XMM SSE2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_YMM AVX2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW UYVY,a,
-
-; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
-
-%macro SplitUVRow 1-2
-cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
- pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
- psrlw m4, m4, 8
- sub dst_vq, dst_uq
-
- ALIGN 4
-.convertloop:
- mov%1 m0, [src_uvq]
- mov%1 m1, [src_uvq + mmsize]
- lea src_uvq, [src_uvq + mmsize * 2]
- psrlw m2, m0, 8 ; odd bytes
- psrlw m3, m1, 8
- pand m0, m0, m4 ; even bytes
- pand m1, m1, m4
- packuswb m0, m0, m1
- packuswb m2, m2, m3
-%if cpuflag(AVX2)
- vpermq m0, m0, 0xd8
- vpermq m2, m2, 0xd8
-%endif
- mov%1 [dst_uq], m0
- mov%1 [dst_uq + dst_vq], m2
- lea dst_uq, [dst_uq + mmsize]
- sub pixd, mmsize
- jg .convertloop
- REP_RET
-%endmacro
-
-INIT_MMX MMX
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_XMM SSE2
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_YMM AVX2
-SplitUVRow a,
-
-; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-; int width);
-
-%macro MergeUVRow_ 1-2
-cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
- sub src_vq, src_uq
-
- ALIGN 4
-.convertloop:
- mov%1 m0, [src_uq]
- mov%1 m1, [src_vq]
- lea src_uq, [src_uq + mmsize]
- punpcklbw m2, m0, m1 // first 8 UV pairs
- punpckhbw m0, m0, m1 // next 8 UV pairs
-%if cpuflag(AVX2)
- vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
- vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
- mov%1 [dst_uvq], m1
- mov%1 [dst_uvq + mmsize], m2
-%else
- mov%1 [dst_uvq], m2
- mov%1 [dst_uvq + mmsize], m0
-%endif
- lea dst_uvq, [dst_uvq + mmsize * 2]
- sub pixd, mmsize
- jg .convertloop
- REP_RET
-%endmacro
-
-INIT_MMX MMX
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_XMM SSE2
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_YMM AVX2
-MergeUVRow_ a,
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc
deleted file mode 100755
index b3893cc00c..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc
+++ /dev/null
@@ -1,926 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyPlane
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Remove this macro if OVERREAD is safe.
-#define AVOID_OVERREAD 1
-
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-
-// Scale plane, 1/2
-// This is an optimized version for scaling down a plane to 1/2 of
-// its original size.
-
-static void ScalePlaneDown2(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) =
- filtering == kFilterNone ? ScaleRowDown2_C :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
- ScaleRowDown2Box_C);
- int row_stride = src_stride << 1;
- if (!filtering) {
- src_ptr += src_stride; // Point to odd rows.
- src_stride = 0;
- }
-
-#if defined(HAS_SCALEROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
- }
-#elif defined(HAS_SCALEROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
- ScaleRowDown2Box_Unaligned_SSE2);
- if (IS_ALIGNED(src_ptr, 16) &&
- IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
- ScaleRowDown2Box_SSE2);
- }
- }
-#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown2 = filtering ?
- ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- // TODO(fbarchard): Loop through source height to allow odd height.
- for (y = 0; y < dst_height; ++y) {
- ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
- src_ptr += row_stride;
- dst_ptr += dst_stride;
- }
-}
-
-// Scale plane, 1/4
-// This is an optimized version for scaling down a plane to 1/4 of
-// its original size.
-
-static void ScalePlaneDown4(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) =
- filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
- int row_stride = src_stride << 2;
- if (!filtering) {
- src_ptr += src_stride * 2; // Point to row 2.
- src_stride = 0;
- }
-#if defined(HAS_SCALEROWDOWN4_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
- }
-#elif defined(HAS_SCALEROWDOWN4_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
- }
-#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown4 = filtering ?
- ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- for (y = 0; y < dst_height; ++y) {
- ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
- src_ptr += row_stride;
- dst_ptr += dst_stride;
- }
-}
-
-// Scale plane down, 3/4
-
-static void ScalePlaneDown34(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
- assert(dst_width % 3 == 0);
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_C;
- ScaleRowDown34_1 = ScaleRowDown34_C;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
- }
-#if defined(HAS_SCALEROWDOWN34_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_NEON;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN34_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
- }
- }
-#endif
-
- for (y = 0; y < dst_height - 2; y += 3) {
- ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
- dst_ptr, dst_width);
- src_ptr += src_stride * 2;
- dst_ptr += dst_stride;
- }
-
- // Remainder 1 or 2 rows with last row vertically unfiltered
- if ((dst_height % 3) == 2) {
- ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
- } else if ((dst_height % 3) == 1) {
- ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
- }
-}
-
-
-// Scale plane, 3/8
-// This is an optimized version for scaling down a plane to 3/8
-// of its original size.
-//
-// Uses box filter arranges like this
-// aaabbbcc -> abc
-// aaabbbcc def
-// aaabbbcc ghi
-// dddeeeff
-// dddeeeff
-// dddeeeff
-// ggghhhii
-// ggghhhii
-// Boxes are 3x3, 2x3, 3x2 and 2x2
-
-static void ScalePlaneDown38(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
- assert(dst_width % 3 == 0);
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_C;
- ScaleRowDown38_2 = ScaleRowDown38_C;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
- }
-#if defined(HAS_SCALEROWDOWN38_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_NEON;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
- }
- }
-#elif defined(HAS_SCALEROWDOWN38_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
- }
- }
-#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
- }
- }
-#endif
-
- for (y = 0; y < dst_height - 2; y += 3) {
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 2;
- dst_ptr += dst_stride;
- }
-
- // Remainder 1 or 2 rows with last row vertically unfiltered
- if ((dst_height % 3) == 2) {
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
- } else if ((dst_height % 3) == 1) {
- ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
- }
-}
-
-static __inline uint32 SumBox(int iboxwidth, int iboxheight,
- ptrdiff_t src_stride, const uint8* src_ptr) {
- uint32 sum = 0u;
- int y;
- assert(iboxwidth > 0);
- assert(iboxheight > 0);
- for (y = 0; y < iboxheight; ++y) {
- int x;
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- src_ptr += src_stride;
- }
- return sum;
-}
-
-static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
- int x, int dx, ptrdiff_t src_stride,
- const uint8* src_ptr, uint8* dst_ptr) {
- int i;
- int boxwidth;
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
- (boxwidth * boxheight);
- }
-}
-
-static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
- uint32 sum = 0u;
- int x;
- assert(iboxwidth > 0);
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- return sum;
-}
-
-static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) {
- int i;
- int scaletbl[2];
- int minboxwidth = (dx >> 16);
- int* scaleptr = scaletbl - minboxwidth;
- int boxwidth;
- scaletbl[0] = 65536 / (minboxwidth * boxheight);
- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
- }
-}
-
-static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) {
- int boxwidth = (dx >> 16);
- int scaleval = 65536 / (boxwidth * boxheight);
- int i;
- for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
- x += boxwidth;
- }
-}
-
-// Scale plane down to any dimensions, with interpolation.
-// (boxfilter).
-//
-// Same method as SimpleScale, which is fixed point, outputting
-// one pixel of destination using fixed point (16.16) to step
-// through source, sampling a box of pixel with simple
-// averaging.
-static void ScalePlaneBox(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr) {
- int j;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- const int max_y = (src_height << 16);
- ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
- uint8* dst = dst_ptr;
- int j;
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint8* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- boxheight = (y >> 16) - iy;
- ScalePlaneBoxRow_C(dst_width, boxheight,
- x, dx, src_stride,
- src, dst);
- dst += dst_stride;
- }
- return;
- }
- {
- // Allocate a row buffer of uint16.
- align_buffer_64(row16, src_width * 2);
- void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) =
- (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
- void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
-
-#if defined(HAS_SCALEADDROWS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
-#ifdef AVOID_OVERREAD
- IS_ALIGNED(src_width, 16) &&
-#endif
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- ScaleAddRows = ScaleAddRows_SSE2;
- }
-#endif
-
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint8* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > (src_height << 16)) {
- y = (src_height << 16);
- }
- boxheight = (y >> 16) - iy;
- ScaleAddRows(src, src_stride, (uint16*)(row16),
- src_width, boxheight);
- ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
- dst_ptr);
- dst_ptr += dst_stride;
- }
- free_aligned_buffer_64(row16);
- }
-}
-
-// Scale plane down with bilinear interpolation.
-void ScalePlaneBilinearDown(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
- // Allocate a row buffer.
- align_buffer_64(row, src_width);
-
- const int max_y = (src_height - 1) << 16;
- int j;
- void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(src_width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(src_width, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
-
-
-#if defined(HAS_SCALEFILTERCOLS_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_SSSE3;
- }
-#endif
- if (y > max_y) {
- y = max_y;
- }
-
- for (j = 0; j < dst_height; ++j) {
- int yi = y >> 16;
- const uint8* src = src_ptr + yi * src_stride;
- if (filtering == kFilterLinear) {
- ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(row, src, src_stride, src_width, yf);
- ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
- }
- dst_ptr += dst_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- }
- free_aligned_buffer_64(row);
-}
-
-// Scale up down with bilinear interpolation.
-void ScalePlaneBilinearUp(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int j;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) =
- filtering ? ScaleFilterCols_C : ScaleCols_C;
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
-
- if (filtering && src_width >= 32768) {
- ScaleFilterCols = ScaleFilterCols64_C;
- }
-#if defined(HAS_SCALEFILTERCOLS_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_SSSE3;
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleFilterCols = ScaleColsUp2_C;
-#if defined(HAS_SCALECOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleFilterCols = ScaleColsUp2_SSE2;
- }
-#endif
- }
-
- if (y > max_y) {
- y = max_y;
- }
- {
- int yi = y >> 16;
- const uint8* src = src_ptr + yi * src_stride;
-
- // Allocate 2 row buffers.
- const int kRowSize = (dst_width + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- uint8* rowptr = row;
- int rowstride = kRowSize;
- int lasty = yi;
-
- ScaleFilterCols(rowptr, src, dst_width, x, dx);
- if (src_height > 1) {
- src += src_stride;
- }
- ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
- src += src_stride;
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- src = src_ptr + yi * src_stride;
- }
- if (yi != lasty) {
- ScaleFilterCols(rowptr, src, dst_width, x, dx);
- rowptr += rowstride;
- rowstride = -rowstride;
- lasty = yi;
- src += src_stride;
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
- }
- dst_ptr += dst_stride;
- y += dy;
- }
- free_aligned_buffer_64(row);
- }
-}
-
-// Scale Plane to/from any dimensions, without interpolation.
-// Fixed point math is used for performance: The upper 16 bits
-// of x and dx is the integer part of the source position and
-// the lower 16 bits are the fixed decimal part.
-
-static void ScalePlaneSimple(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr) {
- int i;
- void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) = ScaleCols_C;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
- if (src_width * 2 == dst_width && x < 0x8000) {
- ScaleCols = ScaleColsUp2_C;
-#if defined(HAS_SCALECOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleCols = ScaleColsUp2_SSE2;
- }
-#endif
- }
-
- for (i = 0; i < dst_height; ++i) {
- ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
- dst_width, x, dx);
- dst_ptr += dst_stride;
- y += dy;
- }
-}
-
-// Scale a plane.
-// This function dispatches to a specialized scaler based on scale factor.
-
-LIBYUV_API
-void ScalePlane(const uint8* src, int src_stride,
- int src_width, int src_height,
- uint8* dst, int dst_stride,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- // Simplify filtering when possible.
- filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
-
- // Negative height means invert the image.
- if (src_height < 0) {
- src_height = -src_height;
- src = src + (src_height - 1) * src_stride;
- src_stride = -src_stride;
- }
-
- // Use specialized scales to improve performance for common resolutions.
- // For example, all the 1/2 scalings will use ScalePlaneDown2()
- if (dst_width == src_width && dst_height == src_height) {
- // Straight copy.
- CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
- return;
- }
- if (dst_width == src_width) {
- int dy = FixedDiv(src_height, dst_height);
- // Arbitrary scale vertically, but unscaled vertically.
- ScalePlaneVertical(src_height,
- dst_width, dst_height,
- src_stride, dst_stride, src, dst,
- 0, 0, dy, 1, filtering);
- return;
- }
- if (dst_width <= Abs(src_width) && dst_height <= src_height) {
- // Scale down.
- if (4 * dst_width == 3 * src_width &&
- 4 * dst_height == 3 * src_height) {
- // optimized, 3/4
- ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (2 * dst_width == src_width && 2 * dst_height == src_height) {
- // optimized, 1/2
- ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- // 3/8 rounded up for odd sized chroma height.
- if (8 * dst_width == 3 * src_width &&
- dst_height == ((src_height * 3 + 7) / 8)) {
- // optimized, 3/8
- ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (4 * dst_width == src_width && 4 * dst_height == src_height &&
- filtering != kFilterBilinear) {
- // optimized, 1/4
- ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- }
- if (filtering == kFilterBox && dst_height * 2 < src_height) {
- ScalePlaneBox(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst);
- return;
- }
- if (filtering && dst_height > src_height) {
- ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (filtering) {
- ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst);
-}
-
-// Scale an I420 image.
-// This function in turn calls a scaling function for each plane.
-
-LIBYUV_API
-int I420Scale(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
- int src_halfheight = SUBSAMPLE(src_height, 1, 1);
- int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
- int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
- if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
- !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
- return -1;
- }
-
- ScalePlane(src_y, src_stride_y, src_width, src_height,
- dst_y, dst_stride_y, dst_width, dst_height,
- filtering);
- ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
- dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
- filtering);
- ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
- dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
- filtering);
- return 0;
-}
-
-// Deprecated api
-LIBYUV_API
-int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
- int src_stride_y, int src_stride_u, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, uint8* dst_u, uint8* dst_v,
- int dst_stride_y, int dst_stride_u, int dst_stride_v,
- int dst_width, int dst_height,
- LIBYUV_BOOL interpolate) {
- return I420Scale(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- src_width, src_height,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- dst_width, dst_height,
- interpolate ? kFilterBox : kFilterNone);
-}
-
-// Deprecated api
-LIBYUV_API
-int ScaleOffset(const uint8* src, int src_width, int src_height,
- uint8* dst, int dst_width, int dst_height, int dst_yoffset,
- LIBYUV_BOOL interpolate) {
- // Chroma requires offset to multiple of 2.
- int dst_yoffset_even = dst_yoffset & ~1;
- int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
- int src_halfheight = SUBSAMPLE(src_height, 1, 1);
- int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
- int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
- int aheight = dst_height - dst_yoffset_even * 2; // actual output height
- const uint8* src_y = src;
- const uint8* src_u = src + src_width * src_height;
- const uint8* src_v = src + src_width * src_height +
- src_halfwidth * src_halfheight;
- uint8* dst_y = dst + dst_yoffset_even * dst_width;
- uint8* dst_u = dst + dst_width * dst_height +
- (dst_yoffset_even >> 1) * dst_halfwidth;
- uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
- (dst_yoffset_even >> 1) * dst_halfwidth;
- if (!src || src_width <= 0 || src_height <= 0 ||
- !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
- dst_yoffset_even >= dst_height) {
- return -1;
- }
- return I420Scale(src_y, src_width,
- src_u, src_halfwidth,
- src_v, src_halfwidth,
- src_width, src_height,
- dst_y, dst_width,
- dst_u, dst_halfwidth,
- dst_v, dst_halfwidth,
- dst_width, aheight,
- interpolate ? kFilterBox : kFilterNone);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc
deleted file mode 100755
index e339cd7c79..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc
+++ /dev/null
@@ -1,809 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// ScaleARGB ARGB, 1/2
-// This is an optimized version for scaling down a ARGB to 1/2 of
-// its original size.
-static void ScaleARGBDown2(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- int row_stride = src_stride * (dy >> 16);
- void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) =
- filtering == kFilterNone ? ScaleARGBRowDown2_C :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
- ScaleARGBRowDown2Box_C);
- assert(dx == 65536 * 2); // Test scale factor of 2.
- assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
- // Advance to odd row, even column.
- if (filtering == kFilterBilinear) {
- src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
- } else {
- src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
- }
-
-#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
- ScaleARGBRowDown2Box_SSE2);
- }
-#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
- ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
- ScaleARGBRowDown2_NEON;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
- src_argb += row_stride;
- dst_argb += dst_stride;
- }
-}
-
-// ScaleARGB ARGB, 1/4
-// This is an optimized version for scaling down a ARGB to 1/4 of
-// its original size.
-static void ScaleARGBDown4Box(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy) {
- int j;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
- int row_stride = src_stride * (dy >> 16);
- void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
- // Advance to odd row, even column.
- src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
- assert(dx == 65536 * 4); // Test scale factor of 4.
- assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
-#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
- }
-#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
- }
-#endif
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
- ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
- row + kRowSize, dst_width * 2);
- ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
- src_argb += row_stride;
- dst_argb += dst_stride;
- }
- free_aligned_buffer_64(row);
-}
-
-// ScaleARGB ARGB Even
-// This is an optimized version for scaling down a ARGB to even
-// multiple of its original size.
-static void ScaleARGBDownEven(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- int col_step = dx >> 16;
- int row_stride = (dy >> 16) * src_stride;
- void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
- int src_step, uint8* dst_argb, int dst_width) =
- filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
- assert(IS_ALIGNED(src_width, 2));
- assert(IS_ALIGNED(src_height, 2));
- src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
-#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
- ScaleARGBRowDownEven_SSE2;
- }
-#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
- IS_ALIGNED(src_argb, 4)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
- ScaleARGBRowDownEven_NEON;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
- src_argb += row_stride;
- dst_argb += dst_stride;
- }
-}
-
-// Scale ARGB down with bilinear interpolation.
-static void ScaleARGBBilinearDown(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
- int64 xlast = x + (int64)(dst_width - 1) * dx;
- int64 xl = (dx >= 0) ? x : xlast;
- int64 xr = (dx >= 0) ? xlast : x;
- int clip_src_width;
- xl = (xl >> 16) & ~3; // Left edge aligned.
- xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
- xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
- if (xr > src_width) {
- xr = src_width;
- }
- clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
- src_argb += xl * 4;
- x -= (int)(xl << 16);
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(clip_src_width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(clip_src_width, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
- // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
- // Allocate a row of ARGB.
- {
- align_buffer_64(row, clip_src_width * 4);
-
- const int max_y = (src_height - 1) << 16;
- if (y > max_y) {
- y = max_y;
- }
- for (j = 0; j < dst_height; ++j) {
- int yi = y >> 16;
- const uint8* src = src_argb + yi * src_stride;
- if (filtering == kFilterLinear) {
- ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(row, src, src_stride, clip_src_width, yf);
- ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
- }
- dst_argb += dst_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- }
- free_aligned_buffer_64(row);
- }
-}
-
-// Scale ARGB up with bilinear interpolation.
-static void ScaleARGBBilinearUp(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
- const int max_y = (src_height - 1) << 16;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
-#endif
- if (src_width >= 32768) {
- ScaleARGBFilterCols = filtering ?
- ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
- }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBCols_SSE2;
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- if (y > max_y) {
- y = max_y;
- }
-
- {
- int yi = y >> 16;
- const uint8* src = src_argb + yi * src_stride;
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- uint8* rowptr = row;
- int rowstride = kRowSize;
- int lasty = yi;
-
- ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
- if (src_height > 1) {
- src += src_stride;
- }
- ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
- src += src_stride;
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- src = src_argb + yi * src_stride;
- }
- if (yi != lasty) {
- ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
- rowptr += rowstride;
- rowstride = -rowstride;
- lasty = yi;
- src += src_stride;
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
- }
- dst_argb += dst_stride;
- y += dy;
- }
- free_aligned_buffer_64(row);
- }
-}
-
-#ifdef YUVSCALEUP
-// Scale YUV to ARGB up with bilinear interpolation.
-static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(src_width, 8)) {
- I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(src_width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(src_width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
-#endif
-
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
- if (src_width >= 32768) {
- ScaleARGBFilterCols = filtering ?
- ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
- }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBCols_SSE2;
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- const int max_y = (src_height - 1) << 16;
- if (y > max_y) {
- y = max_y;
- }
- const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
- int yi = y >> 16;
- int uv_yi = yi >> kYShift;
- const uint8* src_row_y = src_y + yi * src_stride_y;
- const uint8* src_row_u = src_u + uv_yi * src_stride_u;
- const uint8* src_row_v = src_v + uv_yi * src_stride_v;
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- // Allocate 1 row of ARGB for source conversion.
- align_buffer_64(argb_row, src_width * 4);
-
- uint8* rowptr = row;
- int rowstride = kRowSize;
- int lasty = yi;
-
- // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
- ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
- if (src_height > 1) {
- src_row_y += src_stride_y;
- if (yi & 1) {
- src_row_u += src_stride_u;
- src_row_v += src_stride_v;
- }
- }
- ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
- if (src_height > 2) {
- src_row_y += src_stride_y;
- if (!(yi & 1)) {
- src_row_u += src_stride_u;
- src_row_v += src_stride_v;
- }
- }
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- uv_yi = yi >> kYShift;
- src_row_y = src_y + yi * src_stride_y;
- src_row_u = src_u + uv_yi * src_stride_u;
- src_row_v = src_v + uv_yi * src_stride_v;
- }
- if (yi != lasty) {
- // TODO(fbarchard): Convert the clipped region of row.
- I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
- ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
- rowptr += rowstride;
- rowstride = -rowstride;
- lasty = yi;
- src_row_y += src_stride_y;
- if (yi & 1) {
- src_row_u += src_stride_u;
- src_row_v += src_stride_v;
- }
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
- }
- dst_argb += dst_stride_argb;
- y += dy;
- }
- free_aligned_buffer_64(row);
- free_aligned_buffer_64(row_argb);
-}
-#endif
-
-// Scale ARGB to/from any dimensions, without interpolation.
-// Fixed point math is used for performance: The upper 16 bits
-// of x and dx is the integer part of the source position and
-// the lower 16 bits are the fixed decimal part.
-
-static void ScaleARGBSimple(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy) {
- int j;
- void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBCols = ScaleARGBCols_SSE2;
- }
-#endif
- if (src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
- IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- ScaleARGBCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
- dst_width, x, dx);
- dst_argb += dst_stride;
- y += dy;
- }
-}
-
-// ScaleARGB a ARGB.
-// This function in turn calls a scaling function
-// suitable for handling the desired resolutions.
-static void ScaleARGB(const uint8* src, int src_stride,
- int src_width, int src_height,
- uint8* dst, int dst_stride,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering) {
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- // ARGB does not support box filter yet, but allow the user to pass it.
- // Simplify filtering when possible.
- filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
-
- // Negative src_height means invert the image.
- if (src_height < 0) {
- src_height = -src_height;
- src = src + (src_height - 1) * src_stride;
- src_stride = -src_stride;
- }
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
- if (clip_x) {
- int64 clipf = (int64)(clip_x) * dx;
- x += (clipf & 0xffff);
- src += (clipf >> 16) * 4;
- dst += clip_x * 4;
- }
- if (clip_y) {
- int64 clipf = (int64)(clip_y) * dy;
- y += (clipf & 0xffff);
- src += (clipf >> 16) * src_stride;
- dst += clip_y * dst_stride;
- }
-
- // Special case for integer step values.
- if (((dx | dy) & 0xffff) == 0) {
- if (!dx || !dy) { // 1 pixel wide and/or tall.
- filtering = kFilterNone;
- } else {
- // Optimized even scale down. ie 2, 4, 6, 8, 10x.
- if (!(dx & 0x10000) && !(dy & 0x10000)) {
- if (dx == 0x20000) {
- // Optimized 1/2 downsample.
- ScaleARGBDown2(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- if (dx == 0x40000 && filtering == kFilterBox) {
- // Optimized 1/4 box downsample.
- ScaleARGBDown4Box(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy);
- return;
- }
- ScaleARGBDownEven(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- // Optimized odd scale down. ie 3, 5, 7, 9x.
- if ((dx & 0x10000) && (dy & 0x10000)) {
- filtering = kFilterNone;
- if (dx == 0x10000 && dy == 0x10000) {
- // Straight copy.
- ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
- dst, dst_stride, clip_width, clip_height);
- return;
- }
- }
- }
- }
- if (dx == 0x10000 && (x & 0xffff) == 0) {
- // Arbitrary scale vertically, but unscaled vertically.
- ScalePlaneVertical(src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, y, dy, 4, filtering);
- return;
- }
- if (filtering && dy < 65536) {
- ScaleARGBBilinearUp(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- if (filtering) {
- ScaleARGBBilinearDown(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy);
-}
-
-LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering) {
- if (!src_argb || src_width == 0 || src_height == 0 ||
- !dst_argb || dst_width <= 0 || dst_height <= 0 ||
- clip_x < 0 || clip_y < 0 ||
- (clip_x + clip_width) > dst_width ||
- (clip_y + clip_height) > dst_height) {
- return -1;
- }
- ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
- dst_argb, dst_stride_argb, dst_width, dst_height,
- clip_x, clip_y, clip_width, clip_height, filtering);
- return 0;
-}
-
-// Scale an ARGB image.
-LIBYUV_API
-int ARGBScale(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- if (!src_argb || src_width == 0 || src_height == 0 ||
- !dst_argb || dst_width <= 0 || dst_height <= 0) {
- return -1;
- }
- ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
- dst_argb, dst_stride_argb, dst_width, dst_height,
- 0, 0, dst_width, dst_height, filtering);
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc
deleted file mode 100755
index c0b5433239..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
- uint8* dst, int dst_width) {
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- "1: \n"
- // load even pixels into q0, odd into q1
- "vld2.32 {q0, q1}, [%0]! \n"
- "vld2.32 {q2, q3}, [%0]! \n"
- "subs %2, %2, #8 \n" // 8 processed per loop
- "vst1.8 {q1}, [%1]! \n" // store odd pixels
- "vst1.8 {q3}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %1, %0 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
- "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
- "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts.
- "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts.
- "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
- "vrshrn.u16 d1, q1, #2 \n"
- "vrshrn.u16 d2, q2, #2 \n"
- "vrshrn.u16 d3, q3, #2 \n"
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(src_stride), // %1
- "+r"(dst), // %2
- "+r"(dst_width) // %3
- :
- : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t, int src_stepx,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- "mov r12, %3, lsl #2 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld1.32 {d0[0]}, [%0], r12 \n"
- "vld1.32 {d0[1]}, [%0], r12 \n"
- "vld1.32 {d1[0]}, [%0], r12 \n"
- "vld1.32 {d1[1]}, [%0], r12 \n"
- "subs %2, %2, #4 \n" // 4 pixels per loop.
- "vst1.8 {q0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"(src_stepx) // %3
- : "memory", "cc", "r12", "q0"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- "mov r12, %4, lsl #2 \n"
- "add %1, %1, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
- "vld1.8 {d1}, [%1], r12 \n"
- "vld1.8 {d2}, [%0], r12 \n"
- "vld1.8 {d3}, [%1], r12 \n"
- "vld1.8 {d4}, [%0], r12 \n"
- "vld1.8 {d5}, [%1], r12 \n"
- "vld1.8 {d6}, [%0], r12 \n"
- "vld1.8 {d7}, [%1], r12 \n"
- "vaddl.u8 q0, d0, d1 \n"
- "vaddl.u8 q1, d2, d3 \n"
- "vaddl.u8 q2, d4, d5 \n"
- "vaddl.u8 q3, d6, d7 \n"
- "vswp.8 d1, d2 \n" // ab_cd -> ac_bd
- "vswp.8 d5, d6 \n" // ef_gh -> eg_fh
- "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d)
- "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h)
- "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
- "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
- "subs %3, %3, #4 \n" // 4 pixels per loop.
- "vst1.8 {q0}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width) // %3
- : "r"(src_stepx) // %4
- : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
- );
-}
-#endif // __ARM_NEON__
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc
deleted file mode 100644
index 6ed8bfaf97..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc
+++ /dev/null
@@ -1,772 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// CPU agnostic row functions
-void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src_ptr[1];
- dst[1] = src_ptr[3];
- dst += 2;
- src_ptr += 4;
- }
- if (dst_width & 1) {
- dst[0] = src_ptr[1];
- }
-}
-
-void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- const uint8* s = src_ptr;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (s[0] + s[1] + 1) >> 1;
- dst[1] = (s[2] + s[3] + 1) >> 1;
- dst += 2;
- s += 4;
- }
- if (dst_width & 1) {
- dst[0] = (s[0] + s[1] + 1) >> 1;
- }
-}
-
-void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
- dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
- dst += 2;
- s += 4;
- t += 4;
- }
- if (dst_width & 1) {
- dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
- }
-}
-
-void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src_ptr[2];
- dst[1] = src_ptr[6];
- dst += 2;
- src_ptr += 8;
- }
- if (dst_width & 1) {
- dst[0] = src_ptr[2];
- }
-}
-
-void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- intptr_t stride = src_stride;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride + 3] +
- src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
- src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
- src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
- 8) >> 4;
- dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 4] + src_ptr[stride + 5] +
- src_ptr[stride + 6] + src_ptr[stride + 7] +
- src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
- src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
- src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
- src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
- 8) >> 4;
- dst += 2;
- src_ptr += 8;
- }
- if (dst_width & 1) {
- dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride + 3] +
- src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
- src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
- src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
- 8) >> 4;
- }
-}
-
-void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- dst[0] = src_ptr[0];
- dst[1] = src_ptr[1];
- dst[2] = src_ptr[3];
- dst += 3;
- src_ptr += 4;
- }
-}
-
-// Filter rows 0 and 1 together, 3 : 1
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
- d[0] = (a0 * 3 + b0 + 2) >> 2;
- d[1] = (a1 * 3 + b1 + 2) >> 2;
- d[2] = (a2 * 3 + b2 + 2) >> 2;
- d += 3;
- s += 4;
- t += 4;
- }
-}
-
-// Filter rows 1 and 2 together, 1 : 1
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
- d[0] = (a0 + b0 + 1) >> 1;
- d[1] = (a1 + b1 + 1) >> 1;
- d[2] = (a2 + b2 + 1) >> 1;
- d += 3;
- s += 4;
- t += 4;
- }
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst_ptr[0] = src_ptr[x >> 16];
- x += dx;
- dst_ptr[1] = src_ptr[x >> 16];
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- dst_ptr[0] = src_ptr[x >> 16];
- }
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst_ptr[1] = dst_ptr[0] = src_ptr[0];
- src_ptr += 1;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- dst_ptr[0] = src_ptr[0];
- }
-}
-
-// (1-f)a + fb can be replaced with a + f(b-a)
-#define BLENDER(a, b, f) (uint8)((int)(a) + \
- ((int)(f) * ((int)(b) - (int)(a)) >> 16))
-
-void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- x += dx;
- xi = x >> 16;
- a = src_ptr[xi];
- b = src_ptr[xi + 1];
- dst_ptr[1] = BLENDER(a, b, x & 0xffff);
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- int xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- }
-}
-
-void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x32, int dx) {
- int64 x = (int64)(x32);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- x += dx;
- xi = x >> 16;
- a = src_ptr[xi];
- b = src_ptr[xi + 1];
- dst_ptr[1] = BLENDER(a, b, x & 0xffff);
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- int64 xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- }
-}
-#undef BLENDER
-
-void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- assert(dst_width % 3 == 0);
- for (x = 0; x < dst_width; x += 3) {
- dst[0] = src_ptr[0];
- dst[1] = src_ptr[3];
- dst[2] = src_ptr[6];
- dst += 3;
- src_ptr += 8;
- }
-}
-
-// 8x3 -> 3x1
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- int i;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (i = 0; i < dst_width; i += 3) {
- dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
- src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
- (65536 / 9) >> 16;
- dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
- src_ptr[stride + 3] + src_ptr[stride + 4] +
- src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
- (65536 / 9) >> 16;
- dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 6] + src_ptr[stride + 7] +
- src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
- (65536 / 6) >> 16;
- src_ptr += 8;
- dst_ptr += 3;
- }
-}
-
-// 8x2 -> 3x1
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- int i;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (i = 0; i < dst_width; i += 3) {
- dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2]) * (65536 / 6) >> 16;
- dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
- src_ptr[stride + 3] + src_ptr[stride + 4] +
- src_ptr[stride + 5]) * (65536 / 6) >> 16;
- dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 6] + src_ptr[stride + 7]) *
- (65536 / 4) >> 16;
- src_ptr += 8;
- dst_ptr += 3;
- }
-}
-
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
- int x;
- assert(src_width > 0);
- assert(src_height > 0);
- for (x = 0; x < src_width; ++x) {
- const uint8* s = src_ptr + x;
- unsigned int sum = 0u;
- int y;
- for (y = 0; y < src_height; ++y) {
- sum += s[0];
- s += src_stride;
- }
- // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
- dst_ptr[x] = sum < 65535u ? sum : 65535u;
- }
-}
-
-void ScaleARGBRowDown2_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
-
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src[1];
- dst[1] = src[3];
- src += 4;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[1];
- }
-}
-
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- int x;
- for (x = 0; x < dst_width; ++x) {
- dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
- dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
- dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
- dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
- src_argb += 8;
- dst_argb += 4;
- }
-}
-
-void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- int x;
- for (x = 0; x < dst_width; ++x) {
- dst_argb[0] = (src_argb[0] + src_argb[4] +
- src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
- dst_argb[1] = (src_argb[1] + src_argb[5] +
- src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
- dst_argb[2] = (src_argb[2] + src_argb[6] +
- src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
- dst_argb[3] = (src_argb[3] + src_argb[7] +
- src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
- src_argb += 8;
- dst_argb += 4;
- }
-}
-
-void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
-
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src[0];
- dst[1] = src[src_stepx];
- src += src_stepx * 2;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[0];
- }
-}
-
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- int x;
- for (x = 0; x < dst_width; ++x) {
- dst_argb[0] = (src_argb[0] + src_argb[4] +
- src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
- dst_argb[1] = (src_argb[1] + src_argb[5] +
- src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
- dst_argb[2] = (src_argb[2] + src_argb[6] +
- src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
- dst_argb[3] = (src_argb[3] + src_argb[7] +
- src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
- src_argb += src_stepx * 4;
- dst_argb += 4;
- }
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst[0] = src[x >> 16];
- x += dx;
- dst[1] = src[x >> 16];
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[x >> 16];
- }
-}
-
-void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x32, int dx) {
- int64 x = (int64)(x32);
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst[0] = src[x >> 16];
- x += dx;
- dst[1] = src[x >> 16];
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[x >> 16];
- }
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst[1] = dst[0] = src[0];
- src += 1;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[0];
- }
-}
-
-// Mimics SSSE3 blender
-#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
-#define BLENDERC(a, b, f, s) (uint32)( \
- BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
-#define BLENDER(a, b, f) \
- BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
- BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
-
-void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- x += dx;
- xi = x >> 16;
- xf = (x >> 9) & 0x7f;
- a = src[xi];
- b = src[xi + 1];
- dst[1] = BLENDER(a, b, xf);
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- int xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- }
-}
-
-void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x32, int dx) {
- int64 x = (int64)(x32);
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- x += dx;
- xi = x >> 16;
- xf = (x >> 9) & 0x7f;
- a = src[xi];
- b = src[xi + 1];
- dst[1] = BLENDER(a, b, xf);
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- int64 xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- }
-}
-#undef BLENDER1
-#undef BLENDERC
-#undef BLENDER
-
-// Scale plane vertically with bilinear interpolation.
-void ScalePlaneVertical(int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int y, int dy,
- int bpp, enum FilterMode filtering) {
- // TODO(fbarchard): Allow higher bpp.
- int dst_width_bytes = dst_width * bpp;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
- int j;
- assert(bpp >= 1 && bpp <= 4);
- assert(src_height != 0);
- assert(dst_width > 0);
- assert(dst_height > 0);
- src_argb += (x >> 16) * bpp;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSE2;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_Unaligned_SSSE3;
- if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width_bytes, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(dst_width_bytes, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
- for (j = 0; j < dst_height; ++j) {
- int yi;
- int yf;
- if (y > max_y) {
- y = max_y;
- }
- yi = y >> 16;
- yf = filtering ? ((y >> 8) & 255) : 0;
- InterpolateRow(dst_argb, src_argb + yi * src_stride,
- src_stride, dst_width_bytes, yf);
- dst_argb += dst_stride;
- y += dy;
- }
-}
-
-// Simplify the filtering based on scale factors.
-enum FilterMode ScaleFilterReduce(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- if (src_width < 0) {
- src_width = -src_width;
- }
- if (src_height < 0) {
- src_height = -src_height;
- }
- if (filtering == kFilterBox) {
- // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
- if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
- filtering = kFilterBilinear;
- }
- // If scaling to larger, switch from Box to Bilinear.
- if (dst_width >= src_width || dst_height >= src_height) {
- filtering = kFilterBilinear;
- }
- }
- if (filtering == kFilterBilinear) {
- if (src_height == 1) {
- filtering = kFilterLinear;
- }
- // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
- if (dst_height == src_height || dst_height * 3 == src_height) {
- filtering = kFilterLinear;
- }
- // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
- // avoid reading 2 pixels horizontally that causes memory exception.
- if (src_width == 1) {
- filtering = kFilterNone;
- }
- }
- if (filtering == kFilterLinear) {
- if (src_width == 1) {
- filtering = kFilterNone;
- }
- // TODO(fbarchard): Detect any odd scale factor and reduce to None.
- if (dst_width == src_width || dst_width * 3 == src_width) {
- filtering = kFilterNone;
- }
- }
- return filtering;
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_C(int num, int div) {
- return (int)(((int64)(num) << 16) / div);
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv1_C(int num, int div) {
- return (int)((((int64)(num) << 16) - 0x00010001) /
- (div - 1));
-}
-
-#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
-
-// Compute slope values for stepping.
-void ScaleSlope(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering,
- int* x, int* y, int* dx, int* dy) {
- assert(x != NULL);
- assert(y != NULL);
- assert(dx != NULL);
- assert(dy != NULL);
- assert(src_width != 0);
- assert(src_height != 0);
- assert(dst_width > 0);
- assert(dst_height > 0);
- // Check for 1 pixel and avoid FixedDiv overflow.
- if (dst_width == 1 && src_width >= 32768) {
- dst_width = src_width;
- }
- if (dst_height == 1 && src_height >= 32768) {
- dst_height = src_height;
- }
- if (filtering == kFilterBox) {
- // Scale step for point sampling duplicates all pixels equally.
- *dx = FixedDiv(Abs(src_width), dst_width);
- *dy = FixedDiv(src_height, dst_height);
- *x = 0;
- *y = 0;
- } else if (filtering == kFilterBilinear) {
- // Scale step for bilinear sampling renders last pixel once for upsample.
- if (dst_width <= Abs(src_width)) {
- *dx = FixedDiv(Abs(src_width), dst_width);
- *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_width > 1) {
- *dx = FixedDiv1(Abs(src_width), dst_width);
- *x = 0;
- }
- if (dst_height <= src_height) {
- *dy = FixedDiv(src_height, dst_height);
- *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_height > 1) {
- *dy = FixedDiv1(src_height, dst_height);
- *y = 0;
- }
- } else if (filtering == kFilterLinear) {
- // Scale step for bilinear sampling renders last pixel once for upsample.
- if (dst_width <= Abs(src_width)) {
- *dx = FixedDiv(Abs(src_width), dst_width);
- *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_width > 1) {
- *dx = FixedDiv1(Abs(src_width), dst_width);
- *x = 0;
- }
- *dy = FixedDiv(src_height, dst_height);
- *y = *dy >> 1;
- } else {
- // Scale step for point sampling duplicates all pixels equally.
- *dx = FixedDiv(Abs(src_width), dst_width);
- *dy = FixedDiv(src_height, dst_height);
- *x = CENTERSTART(*dx, 0);
- *y = CENTERSTART(*dy, 0);
- }
- // Negative src_width means horizontally mirror.
- if (src_width < 0) {
- *x += (dst_width - 1) * *dx;
- *dx = -*dx;
- // src_width = -src_width; // Caller must do this.
- }
-}
-#undef CENTERSTART
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc
deleted file mode 100755
index 4572f4504e..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC MIPS DSPR2
-#if !defined(LIBYUV_DISABLE_MIPS) && \
- defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 4 \n" // iterations -> by 16
- "beqz $t9, 2f \n"
- " nop \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
- // TODO(fbarchard): Use odd pixels instead of even.
- "precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
- "precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
- "precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
- "precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu $t9, $t9, -1 \n"
- "sw $t8, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $t1, 8(%[dst]) \n"
- "sw $t2, 12(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 16 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 0xf \n" // residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lbu $t0, 0(%[src_ptr]) \n"
- "addiu %[src_ptr], %[src_ptr], 2 \n"
- "addiu $t9, $t9, -1 \n"
- "sb $t0, 0(%[dst]) \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst)
- : [dst_width] "r" (dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- const uint8* t = src_ptr + src_stride;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 3 \n" // iterations -> step 8
- "bltz $t9, 2f \n"
- " nop \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 0(%[t]) \n" // |19|18|17|16|
- "lw $t5, 4(%[t]) \n" // |23|22|21|20|
- "lw $t6, 8(%[t]) \n" // |27|26|25|24|
- "lw $t7, 12(%[t]) \n" // |31|30|29|28|
- "addiu $t9, $t9, -1 \n"
- "srl $t8, $t0, 16 \n" // |X|X|3|2|
- "ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
- "ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
- "raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
- "raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
- "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
- "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
- "srl $t8, $t1, 16 \n" // |X|X|7|6|
- "ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
- "ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
- "raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
- "raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
- "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
- "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
- "srl $t8, $t2, 16 \n" // |X|X|11|10|
- "ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
- "ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
- "raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
- "raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
- "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
- "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
- "srl $t8, $t3, 16 \n" // |X|X|15|14|
- "ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
- "ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
- "raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
- "raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
- "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
- "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
- "addiu %[src_ptr], %[src_ptr], 16 \n"
- "addiu %[t], %[t], 16 \n"
- "sb $t0, 0(%[dst]) \n"
- "sb $t4, 1(%[dst]) \n"
- "sb $t1, 2(%[dst]) \n"
- "sb $t5, 3(%[dst]) \n"
- "sb $t2, 4(%[dst]) \n"
- "sb $t6, 5(%[dst]) \n"
- "sb $t3, 6(%[dst]) \n"
- "sb $t7, 7(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 8 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 0x7 \n" // x = residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lwr $t1, 0(%[src_ptr]) \n"
- "lwl $t1, 3(%[src_ptr]) \n"
- "lwr $t2, 0(%[t]) \n"
- "lwl $t2, 3(%[t]) \n"
- "srl $t8, $t1, 16 \n"
- "ins $t1, $t2, 16, 16 \n"
- "ins $t2, $t8, 0, 16 \n"
- "raddu.w.qb $t1, $t1 \n"
- "raddu.w.qb $t2, $t2 \n"
- "shra_r.w $t1, $t1, 2 \n"
- "shra_r.w $t2, $t2, 2 \n"
- "sb $t1, 0(%[dst]) \n"
- "sb $t2, 1(%[dst]) \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "addiu $t9, $t9, -2 \n"
- "addiu %[t], %[t], 4 \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 2 \n"
-
- "3: \n"
- ".set pop \n"
-
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst), [t] "+r" (t)
- : [dst_width] "r" (dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 3 \n"
- "beqz $t9, 2f \n"
- " nop \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
- "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
- "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
- "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
- "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
- "precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
- "precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu $t9, $t9, -1 \n"
- "sw $t1, 0(%[dst]) \n"
- "sw $t5, 4(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 8 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 7 \n" // residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lbu $t1, 0(%[src_ptr]) \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "addiu $t9, $t9, -1 \n"
- "sb $t1, 0(%[dst]) \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst)
- : [dst_width] "r" (dst_width)
- : "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- intptr_t stride = src_stride;
- const uint8* s1 = src_ptr + stride;
- const uint8* s2 = s1 + stride;
- const uint8* s3 = s2 + stride;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 1 \n"
- "andi $t8, %[dst_width], 1 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
- "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
- "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
- "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 4(%[s1]) \n" // |23|22|21|20|
- "lw $t6, 4(%[s2]) \n" // |27|26|25|24|
- "lw $t7, 4(%[s3]) \n" // |31|30|29|28|
- "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
- "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
- "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
- "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
- "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
- "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
- "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
- "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
- "add $t0, $t0, $t1 \n"
- "add $t1, $t2, $t3 \n"
- "add $t0, $t0, $t1 \n"
- "add $t4, $t4, $t5 \n"
- "add $t6, $t6, $t7 \n"
- "add $t4, $t4, $t6 \n"
- "shra_r.w $t0, $t0, 4 \n"
- "shra_r.w $t4, $t4, 4 \n"
- "sb $t0, 0(%[dst]) \n"
- "sb $t4, 1(%[dst]) \n"
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[s1], %[s1], 8 \n"
- "addiu %[s2], %[s2], 8 \n"
- "addiu %[s3], %[s3], 8 \n"
- "addiu $t9, $t9, -1 \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 2 \n"
- "beqz $t8, 2f \n"
- " nop \n"
-
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
- "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
- "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
- "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
- "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
- "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
- "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
- "add $t0, $t0, $t1 \n"
- "add $t1, $t2, $t3 \n"
- "add $t0, $t0, $t1 \n"
- "shra_r.w $t0, $t0, 4 \n"
- "sb $t0, 0(%[dst]) \n"
-
- "2: \n"
- ".set pop \n"
-
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst),
- [s1] "+r" (s1),
- [s2] "+r" (s2),
- [s3] "+r" (s3)
- : [dst_width] "r" (dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6","t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- ".p2align 2 \n"
- "1: \n"
- "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
- "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
- "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
- "addiu %[dst_width], %[dst_width], -24 \n"
- "ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
- "ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
- "ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
- "ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
- "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
- "prepend $t1, $t2, 8 \n" // |4|3|1|0|
- "prepend $t3, $t4, 24 \n" // |15|13|12|11|
- "prepend $t5, $t6, 8 \n" // |20|19|17|16|
- "prepend $t7, $t8, 24 \n" // |31|29|28|27|
- "sw $t1, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $t3, 8(%[dst]) \n"
- "sw $t5, 12(%[dst]) \n"
- "sw $t9, 16(%[dst]) \n"
- "sw $t7, 20(%[dst]) \n"
- "bnez %[dst_width], 1b \n"
- " addiu %[dst], %[dst], 24 \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6","t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "repl.ph $t3, 3 \n" // 0x00030003
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
- "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
- "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
- "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
- "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
- "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
- "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t1, $t1 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "shra_r.w $t1, $t1, 1 \n"
- "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
- "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
- "rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
- "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
- "addu.ph $t2, $t2, $t4 \n"
- "addu.ph $t6, $t6, $t5 \n"
- "sll $t5, $t0, 1 \n"
- "add $t0, $t5, $t0 \n"
- "shra_r.ph $t2, $t2, 2 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "shll.ph $t4, $t2, 1 \n"
- "addq.ph $t4, $t4, $t2 \n"
- "addu $t0, $t0, $t1 \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "shra_r.w $t0, $t0, 2 \n"
- "addu.ph $t6, $t6, $t4 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "srl $t1, $t6, 16 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "sb $t1, 0(%[d]) \n"
- "sb $t0, 1(%[d]) \n"
- "sb $t6, 2(%[d]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[d], %[d], 3 \n"
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [src_stride] "+r" (src_stride),
- [d] "+r" (d),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6"
- );
-}
-
-void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "repl.ph $t2, 3 \n" // 0x00030003
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
- "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
- "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
- "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
- "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
- "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
- "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t1, $t1 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "shra_r.w $t1, $t1, 1 \n"
- "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
- "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
- "rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
- "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
- "addu.ph $t4, $t4, $t3 \n"
- "addu.ph $t6, $t6, $t5 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "shra_r.ph $t4, $t4, 2 \n"
- "addu.ph $t6, $t6, $t4 \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "shra_r.ph $t6, $t6, 1 \n"
- "addu $t0, $t0, $t1 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "srl $t1, $t6, 16 \n"
- "sb $t1, 0(%[d]) \n"
- "sb $t0, 1(%[d]) \n"
- "sb $t6, 2(%[d]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[d], %[d], 3 \n"
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [src_stride] "+r" (src_stride),
- [d] "+r" (d),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6"
- );
-}
-
-void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
- "wsbh $t0, $t0 \n" // |2|3|0|1|
- "wsbh $t6, $t6 \n" // |26|27|24|25|
- "srl $t0, $t0, 8 \n" // |X|2|3|0|
- "srl $t3, $t3, 16 \n" // |X|X|15|14|
- "srl $t5, $t5, 16 \n" // |X|X|23|22|
- "srl $t7, $t7, 16 \n" // |X|X|31|30|
- "ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
- "ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
- "ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
- "ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
- "prepend $t2, $t3, 24 \n" // |X|15|14|11|
- "ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
- "ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu %[dst_width], %[dst_width], -12 \n"
- "addiu $t8,%[dst_width], -12 \n"
- "sw $t1, 0(%[dst]) \n"
- "sw $t4, 4(%[dst]) \n"
- "sw $t6, 8(%[dst]) \n"
- "bgez $t8, 1b \n"
- " addiu %[dst], %[dst], 12 \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4",
- "t5", "t6", "t7", "t8"
- );
-}
-
-void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- const uint8* t = src_ptr + stride;
- const int c = 0x2AAA;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
- "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
- "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
- "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
- "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
- "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
- "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
- "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
- "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
- "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
- "srl $t4, $t4, 2 \n" // t4 / 4
- "srl $t6, $t6, 16 \n" // |0|0|S3|T3|
- "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
- "addu $t6, $t5, $t6 \n"
- "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
- "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
- "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
- "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
- "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
- "addu $t0, $t0, $t2 \n"
- "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[t], %[t], 8 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "addiu %[dst_ptr], %[dst_ptr], 3 \n"
- "srl $t6, $t6, 16 \n"
- "srl $t0, $t0, 16 \n"
- "sb $t4, -1(%[dst_ptr]) \n"
- "sb $t6, -2(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " sb $t0, -3(%[dst_ptr]) \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst_ptr] "+r" (dst_ptr),
- [t] "+r" (t),
- [dst_width] "+r" (dst_width)
- : [c] "r" (c)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6"
- );
-}
-
-void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- const uint8* s1 = src_ptr + stride;
- stride += stride;
- const uint8* s2 = src_ptr + stride;
- const int c1 = 0x1C71;
- const int c2 = 0x2AAA;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
- "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
- "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
- "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
- "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
- "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
- "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
- "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
- "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
- "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
- "sll $t8, $t5, 16 \n" // |R5|R4|0|0|
- "raddu.w.qb $t8, $t8 \n" // R5+R4
- "addu $t7, $t7, $t8 \n"
- "srl $t8, $t5, 16 \n" // |0|0|R7|R6|
- "raddu.w.qb $t8, $t8 \n" // R7 + R6
- "addu $t6, $t6, $t8 \n"
- "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
- "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
- "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
- "srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
- "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
- "addu $t7, $t7, $t8 \n"
- "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
- "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
- "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
- "sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t2, $t2 \n"
- "raddu.w.qb $t4, $t4 \n"
- "addu $t0, $t0, $t2 \n"
- "addu $t0, $t0, $t4 \n"
- "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[s1], %[s1], 8 \n"
- "addiu %[s2], %[s2], 8 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "addiu %[dst_ptr], %[dst_ptr], 3 \n"
- "srl $t6, $t6, 16 \n"
- "srl $t7, $t7, 16 \n"
- "srl $t0, $t0, 16 \n"
- "sb $t6, -1(%[dst_ptr]) \n"
- "sb $t7, -2(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " sb $t0, -3(%[dst_ptr]) \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst_ptr] "+r" (dst_ptr),
- [s1] "+r" (s1),
- [s2] "+r" (s2),
- [dst_width] "+r" (dst_width)
- : [c1] "r" (c1), [c2] "r" (c2)
- : "t0", "t1", "t2", "t3", "t4",
- "t5", "t6", "t7", "t8"
- );
-}
-
-#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc
deleted file mode 100755
index a9df93c055..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc
+++ /dev/null
@@ -1,699 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon.
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-// NEON downscalers with interpolation.
-// Provided by Fritz Koenig
-
-// Read 32x1 throw away even pixels, and write 16x1.
-void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- ".p2align 2 \n"
- "1: \n"
- // load even pixels into q0, odd into q1
- "vld2.8 {q0, q1}, [%0]! \n"
- "subs %2, %2, #16 \n" // 16 processed per loop
- "vst1.8 {q1}, [%1]! \n" // store odd pixels
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "q0", "q1" // Clobber List
- );
-}
-
-// Read 32x2 average down and write 16x1.
-void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- // change the stride to row 2 pointer
- "add %1, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
- "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc
- "subs %3, %3, #16 \n" // 16 processed per loop
- "vpaddl.u8 q0, q0 \n" // row 1 add adjacent
- "vpaddl.u8 q1, q1 \n"
- "vpadal.u8 q0, q2 \n" // row 2 add adjacent + row1
- "vpadal.u8 q1, q3 \n"
- "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
- "vrshrn.u16 d1, q1, #2 \n"
- "vst1.8 {q0}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(src_stride), // %1
- "+r"(dst), // %2
- "+r"(dst_width) // %3
- :
- : "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- "subs %2, %2, #8 \n" // 8 processed per loop
- "vst1.8 {d2}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "q0", "q1", "memory", "cc"
- );
-}
-
-void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- "add r4, %0, %3 \n"
- "add r5, r4, %3 \n"
- "add %3, r5, %3 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {q0}, [%0]! \n" // load up 16x4
- "vld1.8 {q1}, [r4]! \n"
- "vld1.8 {q2}, [r5]! \n"
- "vld1.8 {q3}, [%3]! \n"
- "subs %2, %2, #4 \n"
- "vpaddl.u8 q0, q0 \n"
- "vpadal.u8 q0, q1 \n"
- "vpadal.u8 q0, q2 \n"
- "vpadal.u8 q0, q3 \n"
- "vpaddl.u16 q0, q0 \n"
- "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding
- "vmovn.u16 d0, q0 \n"
- "vst1.32 {d0[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"(src_stride) // %3
- : "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc"
- );
-}
-
-// Down scale from 4 to 3 pixels. Use the neon multilane read/write
-// to load up the every 4th pixel into a 4 different registers.
-// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
-#ifdef _ANDROID
- ".fpu neon\n"
-#endif
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- "subs %2, %2, #24 \n"
- "vmov d2, d3 \n" // order d0, d1, d2
- "vst3.8 {d0, d1, d2}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "d0", "d1", "d2", "d3", "memory", "cc"
- );
-}
-
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "vmov.u8 d24, #3 \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
- "subs %2, %2, #24 \n"
-
- // filter src line 0 with src line 1
- // expand chars to shorts to allow for room
- // when adding lines together
- "vmovl.u8 q8, d4 \n"
- "vmovl.u8 q9, d5 \n"
- "vmovl.u8 q10, d6 \n"
- "vmovl.u8 q11, d7 \n"
-
- // 3 * line_0 + line_1
- "vmlal.u8 q8, d0, d24 \n"
- "vmlal.u8 q9, d1, d24 \n"
- "vmlal.u8 q10, d2, d24 \n"
- "vmlal.u8 q11, d3, d24 \n"
-
- // (3 * line_0 + line_1) >> 2
- "vqrshrn.u16 d0, q8, #2 \n"
- "vqrshrn.u16 d1, q9, #2 \n"
- "vqrshrn.u16 d2, q10, #2 \n"
- "vqrshrn.u16 d3, q11, #2 \n"
-
- // a0 = (src[0] * 3 + s[1] * 1) >> 2
- "vmovl.u8 q8, d1 \n"
- "vmlal.u8 q8, d0, d24 \n"
- "vqrshrn.u16 d0, q8, #2 \n"
-
- // a1 = (src[1] * 1 + s[2] * 1) >> 1
- "vrhadd.u8 d1, d1, d2 \n"
-
- // a2 = (src[2] * 1 + s[3] * 3) >> 2
- "vmovl.u8 q8, d2 \n"
- "vmlal.u8 q8, d3, d24 \n"
- "vqrshrn.u16 d2, q8, #2 \n"
-
- "vst3.8 {d0, d1, d2}, [%1]! \n"
-
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- :
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc"
- );
-}
-
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "vmov.u8 d24, #3 \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
- "subs %2, %2, #24 \n"
- // average src line 0 with src line 1
- "vrhadd.u8 q0, q0, q2 \n"
- "vrhadd.u8 q1, q1, q3 \n"
-
- // a0 = (src[0] * 3 + s[1] * 1) >> 2
- "vmovl.u8 q3, d1 \n"
- "vmlal.u8 q3, d0, d24 \n"
- "vqrshrn.u16 d0, q3, #2 \n"
-
- // a1 = (src[1] * 1 + s[2] * 1) >> 1
- "vrhadd.u8 d1, d1, d2 \n"
-
- // a2 = (src[2] * 1 + s[3] * 3) >> 2
- "vmovl.u8 q3, d2 \n"
- "vmlal.u8 q3, d3, d24 \n"
- "vqrshrn.u16 d2, q3, #2 \n"
-
- "vst3.8 {d0, d1, d2}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- :
- : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"
- );
-}
-
-#define HAS_SCALEROWDOWN38_NEON
-static uvec8 kShuf38 =
- { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
-static uvec8 kShuf38_2 =
- { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
-static vec16 kMult38_Div6 =
- { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
- 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
-static vec16 kMult38_Div9 =
- { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
- 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
-
-// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "vld1.8 {q3}, [%3] \n"
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d0, d1, d2, d3}, [%0]! \n"
- "subs %2, %2, #12 \n"
- "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n"
- "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n"
- "vst1.8 {d4}, [%1]! \n"
- "vst1.32 {d5[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"(&kShuf38) // %3
- : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"
- );
-}
-
-// 32x3 -> 12x1
-void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "vld1.16 {q13}, [%4] \n"
- "vld1.8 {q14}, [%5] \n"
- "vld1.8 {q15}, [%6] \n"
- "add r4, %0, %3, lsl #1 \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
-
- // d0 = 00 40 01 41 02 42 03 43
- // d1 = 10 50 11 51 12 52 13 53
- // d2 = 20 60 21 61 22 62 23 63
- // d3 = 30 70 31 71 32 72 33 73
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
- "vld4.8 {d16, d17, d18, d19}, [r4]! \n"
- "subs %2, %2, #12 \n"
-
- // Shuffle the input data around to get align the data
- // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
- // d0 = 00 10 01 11 02 12 03 13
- // d1 = 40 50 41 51 42 52 43 53
- "vtrn.u8 d0, d1 \n"
- "vtrn.u8 d4, d5 \n"
- "vtrn.u8 d16, d17 \n"
-
- // d2 = 20 30 21 31 22 32 23 33
- // d3 = 60 70 61 71 62 72 63 73
- "vtrn.u8 d2, d3 \n"
- "vtrn.u8 d6, d7 \n"
- "vtrn.u8 d18, d19 \n"
-
- // d0 = 00+10 01+11 02+12 03+13
- // d2 = 40+50 41+51 42+52 43+53
- "vpaddl.u8 q0, q0 \n"
- "vpaddl.u8 q2, q2 \n"
- "vpaddl.u8 q8, q8 \n"
-
- // d3 = 60+70 61+71 62+72 63+73
- "vpaddl.u8 d3, d3 \n"
- "vpaddl.u8 d7, d7 \n"
- "vpaddl.u8 d19, d19 \n"
-
- // combine source lines
- "vadd.u16 q0, q2 \n"
- "vadd.u16 q0, q8 \n"
- "vadd.u16 d4, d3, d7 \n"
- "vadd.u16 d4, d19 \n"
-
- // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
- // + s[6 + st * 1] + s[7 + st * 1]
- // + s[6 + st * 2] + s[7 + st * 2]) / 6
- "vqrdmulh.s16 q2, q2, q13 \n"
- "vmovn.u16 d4, q2 \n"
-
- // Shuffle 2,3 reg around so that 2 can be added to the
- // 0,1 reg and 3 can be added to the 4,5 reg. This
- // requires expanding from u8 to u16 as the 0,1 and 4,5
- // registers are already expanded. Then do transposes
- // to get aligned.
- // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
- "vmovl.u8 q1, d2 \n"
- "vmovl.u8 q3, d6 \n"
- "vmovl.u8 q9, d18 \n"
-
- // combine source lines
- "vadd.u16 q1, q3 \n"
- "vadd.u16 q1, q9 \n"
-
- // d4 = xx 20 xx 30 xx 22 xx 32
- // d5 = xx 21 xx 31 xx 23 xx 33
- "vtrn.u32 d2, d3 \n"
-
- // d4 = xx 20 xx 21 xx 22 xx 23
- // d5 = xx 30 xx 31 xx 32 xx 33
- "vtrn.u16 d2, d3 \n"
-
- // 0+1+2, 3+4+5
- "vadd.u16 q0, q1 \n"
-
- // Need to divide, but can't downshift as the the value
- // isn't a power of 2. So multiply by 65536 / n
- // and take the upper 16 bits.
- "vqrdmulh.s16 q0, q0, q15 \n"
-
- // Align for table lookup, vtbl requires registers to
- // be adjacent
- "vmov.u8 d2, d4 \n"
-
- "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
- "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
-
- "vst1.8 {d3}, [%1]! \n"
- "vst1.32 {d4[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- : "r"(&kMult38_Div6), // %4
- "r"(&kShuf38_2), // %5
- "r"(&kMult38_Div9) // %6
- : "r4", "q0", "q1", "q2", "q3", "q8", "q9",
- "q13", "q14", "q15", "memory", "cc"
- );
-}
-
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "vld1.16 {q13}, [%4] \n"
- "vld1.8 {q14}, [%5] \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
-
- // d0 = 00 40 01 41 02 42 03 43
- // d1 = 10 50 11 51 12 52 13 53
- // d2 = 20 60 21 61 22 62 23 63
- // d3 = 30 70 31 71 32 72 33 73
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
- "subs %2, %2, #12 \n"
-
- // Shuffle the input data around to get align the data
- // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
- // d0 = 00 10 01 11 02 12 03 13
- // d1 = 40 50 41 51 42 52 43 53
- "vtrn.u8 d0, d1 \n"
- "vtrn.u8 d4, d5 \n"
-
- // d2 = 20 30 21 31 22 32 23 33
- // d3 = 60 70 61 71 62 72 63 73
- "vtrn.u8 d2, d3 \n"
- "vtrn.u8 d6, d7 \n"
-
- // d0 = 00+10 01+11 02+12 03+13
- // d2 = 40+50 41+51 42+52 43+53
- "vpaddl.u8 q0, q0 \n"
- "vpaddl.u8 q2, q2 \n"
-
- // d3 = 60+70 61+71 62+72 63+73
- "vpaddl.u8 d3, d3 \n"
- "vpaddl.u8 d7, d7 \n"
-
- // combine source lines
- "vadd.u16 q0, q2 \n"
- "vadd.u16 d4, d3, d7 \n"
-
- // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
- "vqrshrn.u16 d4, q2, #2 \n"
-
- // Shuffle 2,3 reg around so that 2 can be added to the
- // 0,1 reg and 3 can be added to the 4,5 reg. This
- // requires expanding from u8 to u16 as the 0,1 and 4,5
- // registers are already expanded. Then do transposes
- // to get aligned.
- // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
- "vmovl.u8 q1, d2 \n"
- "vmovl.u8 q3, d6 \n"
-
- // combine source lines
- "vadd.u16 q1, q3 \n"
-
- // d4 = xx 20 xx 30 xx 22 xx 32
- // d5 = xx 21 xx 31 xx 23 xx 33
- "vtrn.u32 d2, d3 \n"
-
- // d4 = xx 20 xx 21 xx 22 xx 23
- // d5 = xx 30 xx 31 xx 32 xx 33
- "vtrn.u16 d2, d3 \n"
-
- // 0+1+2, 3+4+5
- "vadd.u16 q0, q1 \n"
-
- // Need to divide, but can't downshift as the the value
- // isn't a power of 2. So multiply by 65536 / n
- // and take the upper 16 bits.
- "vqrdmulh.s16 q0, q0, q13 \n"
-
- // Align for table lookup, vtbl requires registers to
- // be adjacent
- "vmov.u8 d2, d4 \n"
-
- "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
- "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
-
- "vst1.8 {d3}, [%1]! \n"
- "vst1.32 {d4[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- : "r"(&kMult38_Div6), // %4
- "r"(&kShuf38_2) // %5
- : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"
- );
-}
-
-// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8* dst_ptr,
- const uint8* src_ptr, ptrdiff_t src_stride,
- int dst_width, int source_y_fraction) {
- asm volatile (
- "cmp %4, #0 \n"
- "beq 100f \n"
- "add %2, %1 \n"
- "cmp %4, #64 \n"
- "beq 75f \n"
- "cmp %4, #128 \n"
- "beq 50f \n"
- "cmp %4, #192 \n"
- "beq 25f \n"
-
- "vdup.8 d5, %4 \n"
- "rsb %4, #256 \n"
- "vdup.8 d4, %4 \n"
- // General purpose row blend.
- "1: \n"
- "vld1.8 {q0}, [%1]! \n"
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vmull.u8 q13, d0, d4 \n"
- "vmull.u8 q14, d1, d4 \n"
- "vmlal.u8 q13, d2, d5 \n"
- "vmlal.u8 q14, d3, d5 \n"
- "vrshrn.u16 d0, q13, #8 \n"
- "vrshrn.u16 d1, q14, #8 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- "vld1.8 {q0}, [%1]! \n"
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 25b \n"
- "b 99f \n"
-
- // Blend 50 / 50.
- "50: \n"
- "vld1.8 {q0}, [%1]! \n"
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- "vld1.8 {q1}, [%1]! \n"
- "vld1.8 {q0}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 75b \n"
- "b 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- "100: \n"
- "vld1.8 {q0}, [%1]! \n"
- "subs %3, %3, #16 \n"
- "vst1.8 {q0}, [%0]! \n"
- "bgt 100b \n"
-
- "99: \n"
- "vst1.8 {d1[7]}, [%0] \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(src_stride), // %2
- "+r"(dst_width), // %3
- "+r"(source_y_fraction) // %4
- :
- : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"
- );
-}
-
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- // load even pixels into q0, odd into q1
- "vld2.32 {q0, q1}, [%0]! \n"
- "vld2.32 {q2, q3}, [%0]! \n"
- "subs %2, %2, #8 \n" // 8 processed per loop
- "vst1.8 {q1}, [%1]! \n" // store odd pixels
- "vst1.8 {q3}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %1, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
- "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
- "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts.
- "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts.
- "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
- "vrshrn.u16 d1, q1, #2 \n"
- "vrshrn.u16 d2, q2, #2 \n"
- "vrshrn.u16 d3, q3, #2 \n"
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(src_stride), // %1
- "+r"(dst), // %2
- "+r"(dst_width) // %3
- :
- : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx, uint8* dst_argb, int dst_width) {
- asm volatile (
- "mov r12, %3, lsl #2 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld1.32 {d0[0]}, [%0], r12 \n"
- "vld1.32 {d0[1]}, [%0], r12 \n"
- "vld1.32 {d1[0]}, [%0], r12 \n"
- "vld1.32 {d1[1]}, [%0], r12 \n"
- "subs %2, %2, #4 \n" // 4 pixels per loop.
- "vst1.8 {q0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"(src_stepx) // %3
- : "memory", "cc", "r12", "q0"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- "mov r12, %4, lsl #2 \n"
- "add %1, %1, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
- "vld1.8 {d1}, [%1], r12 \n"
- "vld1.8 {d2}, [%0], r12 \n"
- "vld1.8 {d3}, [%1], r12 \n"
- "vld1.8 {d4}, [%0], r12 \n"
- "vld1.8 {d5}, [%1], r12 \n"
- "vld1.8 {d6}, [%0], r12 \n"
- "vld1.8 {d7}, [%1], r12 \n"
- "vaddl.u8 q0, d0, d1 \n"
- "vaddl.u8 q1, d2, d3 \n"
- "vaddl.u8 q2, d4, d5 \n"
- "vaddl.u8 q3, d6, d7 \n"
- "vswp.8 d1, d2 \n" // ab_cd -> ac_bd
- "vswp.8 d5, d6 \n" // ef_gh -> eg_fh
- "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d)
- "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h)
- "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
- "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
- "subs %3, %3, #4 \n" // 4 pixels per loop.
- "vst1.8 {q0}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width) // %3
- : "r"(src_stepx) // %4
- : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
- );
-}
-
-#endif // __ARM_NEON__
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc
deleted file mode 100644
index 352e667822..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc
+++ /dev/null
@@ -1,1315 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-// Offsets for source bytes 0 to 9
-static uvec8 kShuf0 =
- { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
-static uvec8 kShuf1 =
- { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf2 =
- { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 0 to 10
-static uvec8 kShuf01 =
- { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
-
-// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
-static uvec8 kShuf11 =
- { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf21 =
- { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
-
-// Coefficients for source bytes 0 to 10
-static uvec8 kMadd01 =
- { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
-
-// Coefficients for source bytes 10 to 21
-static uvec8 kMadd11 =
- { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
-
-// Coefficients for source bytes 21 to 31
-static uvec8 kMadd21 =
- { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
-
-// Coefficients for source bytes 21 to 31
-static vec16 kRound34 =
- { 2, 2, 2, 2, 2, 2, 2, 2 };
-
-static uvec8 kShuf38a =
- { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static uvec8 kShuf38b =
- { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 0,1,2
-static uvec8 kShufAc =
- { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 3,4,5
-static uvec8 kShufAc3 =
- { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x3 and 2x3
-static uvec16 kScaleAc33 =
- { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
-
-// Arrange first value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb0 =
- { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
-
-// Arrange second value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb1 =
- { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
-
-// Arrange third value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb2 =
- { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x2 and 2x2
-static uvec16 kScaleAb2 =
- { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
-
-// GCC versions of row functions are verbatim conversions from Visual C.
-// Generated using gcc disassembly on Visual C object file:
-// objdump -D yuvscaler.obj >yuvscaler.txt
-
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm2) // movdqa (%0,%3,1),%%xmm2
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,3,1,xmm3) // movdqa 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
- BUNDLEALIGN
- MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x18,%%xmm5 \n"
- "pslld $0x10,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stridex3 = 0;
- asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0x8,%%xmm7 \n"
- "lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,4,2,xmm2) // movdqa (%0,%4,2),%%xmm2
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,4,2,xmm3) // movdqa 0x10(%0,%4,2),%%xmm3
- MEMOPREG(movdqa,0x00,0,3,1,xmm4) // movdqa (%0,%3,1),%%xmm4
- MEMOPREG(movdqa,0x10,0,3,1,xmm5) // movdqa 0x10(%0,%3,1),%%xmm5
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm4,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm5,%%xmm3 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm7,%%xmm2 \n"
- "pand %%xmm7,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "pand %%xmm7,%%xmm2 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(stridex3) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
-#endif
- );
-}
-
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm3 \n"
- "movdqa %1,%%xmm4 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kShuf0), // %0
- "m"(kShuf1), // %1
- "m"(kShuf2) // %2
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm2 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "palignr $0x8,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm3,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "movq %%xmm1," MEMACCESS2(0x8,1) " \n"
- "movq %%xmm2," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n" // kShuf01
- "movdqa %1,%%xmm3 \n" // kShuf11
- "movdqa %2,%%xmm4 \n" // kShuf21
- :
- : "m"(kShuf01), // %0
- "m"(kShuf11), // %1
- "m"(kShuf21) // %2
- );
- asm volatile (
- "movdqa %0,%%xmm5 \n" // kMadd01
- "movdqa %1,%%xmm0 \n" // kMadd11
- "movdqa %2,%%xmm1 \n" // kRound34
- :
- : "m"(kMadd01), // %0
- "m"(kMadd11), // %1
- "m"(kRound34) // %2
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm7) // movdqa (%0,%3),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm0,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x10,0,3,1,xmm7) // movdqa 0x10(%0,%3),%%xmm7
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm4,%%xmm6 \n"
- "pmaddubsw %4,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "m"(kMadd21) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n" // kShuf01
- "movdqa %1,%%xmm3 \n" // kShuf11
- "movdqa %2,%%xmm4 \n" // kShuf21
- :
- : "m"(kShuf01), // %0
- "m"(kShuf11), // %1
- "m"(kShuf21) // %2
- );
- asm volatile (
- "movdqa %0,%%xmm5 \n" // kMadd01
- "movdqa %1,%%xmm0 \n" // kMadd11
- "movdqa %2,%%xmm1 \n" // kRound34
- :
- : "m"(kMadd01), // %0
- "m"(kMadd11), // %1
- "m"(kRound34) // %2
- );
-
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm7) // movdqa (%0,%3,1),%%xmm7
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm0,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- MEMOPREG(movdqa,0x10,0,3,1,xmm7) // movdqa 0x10(%0,%3,1),%%xmm7
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm4,%%xmm6 \n"
- "pmaddubsw %4,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "m"(kMadd21) // %4
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "movhlps %%xmm0,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS2(0x8,1) " \n"
- "lea " MEMLEA(0xc,1) ",%1 \n"
- "sub $0xc,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "m"(kShuf38a), // %3
- "m"(kShuf38b) // %4
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm4", "xmm5"
-#endif
- );
-}
-
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm4 \n"
- "movdqa %3,%%xmm5 \n"
- :
- : "m"(kShufAb0), // %0
- "m"(kShufAb1), // %1
- "m"(kShufAb2), // %2
- "m"(kScaleAb2) // %3
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3,1),%%xmm0
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pshufb %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "paddusw %%xmm6,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "sub $0x6,%2 \n"
- "movd %%xmm1," MEMACCESS(1) " \n"
- "psrlq $0x10,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS2(0x2,1) " \n"
- "lea " MEMLEA(0x6,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
- :
- : "m"(kShufAc), // %0
- "m"(kShufAc3), // %1
- "m"(kScaleAc33) // %2
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqa,0x00,0,3,1,xmm6) // movdqa (%0,%3,1),%%xmm6
- "movhlps %%xmm0,%%xmm1 \n"
- "movhlps %%xmm6,%%xmm7 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm6 \n"
- "punpcklbw %%xmm5,%%xmm7 \n"
- "paddusw %%xmm6,%%xmm0 \n"
- "paddusw %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqa,0x00,0,3,2,xmm6) // movdqa (%0,%3,2),%%xmm6
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movhlps %%xmm6,%%xmm7 \n"
- "punpcklbw %%xmm5,%%xmm6 \n"
- "punpcklbw %%xmm5,%%xmm7 \n"
- "paddusw %%xmm6,%%xmm0 \n"
- "paddusw %%xmm7,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "psrldq $0x2,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm6 \n"
- "psrldq $0x2,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "movdqa %%xmm1,%%xmm7 \n"
- "psrldq $0x2,%%xmm1 \n"
- "paddusw %%xmm1,%%xmm7 \n"
- "psrldq $0x2,%%xmm1 \n"
- "paddusw %%xmm1,%%xmm7 \n"
- "pshufb %%xmm3,%%xmm7 \n"
- "paddusw %%xmm7,%%xmm6 \n"
- "pmulhuw %%xmm4,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "sub $0x6,%2 \n"
- "movd %%xmm6," MEMACCESS(1) " \n"
- "psrlq $0x10,%%xmm6 \n"
- "movd %%xmm6," MEMACCESS2(0x2,1) " \n"
- "lea " MEMLEA(0x6,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
- );
-}
-
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
- int tmp_height = 0;
- intptr_t tmp_src = 0;
- asm volatile (
- "pxor %%xmm4,%%xmm4 \n"
- "sub $0x1,%5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "mov %0,%3 \n"
- "add %6,%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm4,%%xmm0 \n"
- "punpckhbw %%xmm4,%%xmm1 \n"
- "mov %5,%2 \n"
- "test %2,%2 \n"
- "je 3f \n"
-
- LABELALIGN
- "2: \n"
- "movdqa " MEMACCESS(0) ",%%xmm2 \n"
- "add %6,%0 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
- "paddusw %%xmm2,%%xmm0 \n"
- "paddusw %%xmm3,%%xmm1 \n"
- "sub $0x1,%2 \n"
- "jg 2b \n"
-
- LABELALIGN
- "3: \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x10,3) ",%0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(tmp_height), // %2
- "+r"(tmp_src), // %3
- "+r"(src_width), // %4
- "+rm"(src_height) // %5
- : "rm"((intptr_t)(src_stride)) // %6
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
- );
-}
-
-// Bilinear column filtering. SSSE3 version.
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
- asm volatile (
- "movd %6,%%xmm2 \n"
- "movd %7,%%xmm3 \n"
- "movl $0x04040000,%k2 \n"
- "movd %k2,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x9,%%xmm6 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "subl $0x2,%5 \n"
- "jl 29f \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "punpckldq %%xmm0,%%xmm2 \n"
- "punpckldq %%xmm3,%%xmm3 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
-
- LABELALIGN
- "2: \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
- "movd %k2,%%xmm0 \n"
- "psrlw $0x9,%%xmm1 \n"
- BUNDLEALIGN
- MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2
- "movd %k2,%%xmm4 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "punpcklwd %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm1 \n"
- "pmaddubsw %%xmm1,%%xmm0 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k2 \n"
- "mov %w2," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x2,0) ",%0 \n"
- "sub $0x2,%5 \n"
- "jge 2b \n"
-
- LABELALIGN
- "29: \n"
- "addl $0x1,%5 \n"
- "jl 99f \n"
- MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
- "movd %k2,%%xmm0 \n"
- "psrlw $0x9,%%xmm2 \n"
- "pshufb %%xmm5,%%xmm2 \n"
- "pxor %%xmm6,%%xmm2 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k2 \n"
- "mov %b2," MEMACCESS(0) " \n"
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+a"(temp_pixel), // %2
- "+r"(x0), // %3
- "+r"(x1), // %4
- "+rm"(dst_width) // %5
- : "rm"(x), // %6
- "rm"(dx) // %7
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "sub $0x20,%2 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "jg 1b \n"
-
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "shufps $0xdd,%%xmm1,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movdqa,0x00,0,3,1,xmm2) // movdqa (%0,%3,1),%%xmm2
- MEMOPREG(movdqa,0x10,0,3,1,xmm3) // movdqa 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "sub $0x4,%2 \n"
- "movdqa %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
- intptr_t src_stepx_x12 = 0;
- asm volatile (
- "lea " MEMLEA3(0x00,1,4) ",%1 \n"
- "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
- LABELALIGN
- "1: \n"
- "movd " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
- "punpckldq %%xmm1,%%xmm0 \n"
- BUNDLEALIGN
- MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
- MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
- "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
- "punpckldq %%xmm3,%%xmm2 \n"
- "punpcklqdq %%xmm2,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stepx_x4), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width), // %3
- "+r"(src_stepx_x12) // %4
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
- );
-}
-
-// Blends four 2x2 to 4x1.
-// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride, int src_stepx,
- uint8* dst_argb, int dst_width) {
- intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
- intptr_t src_stepx_x12 = 0;
- intptr_t row1 = (intptr_t)(src_stride);
- asm volatile (
- "lea " MEMLEA3(0x00,1,4) ",%1 \n"
- "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
- "lea " MEMLEA4(0x00,0,5,1) ",%5 \n"
-
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
- MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
- BUNDLEALIGN
- MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
- "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
- "movq " MEMACCESS(5) ",%%xmm2 \n"
- BUNDLEALIGN
- MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
- MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
- MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
- "lea " MEMLEA4(0x00,5,1,4) ",%5 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "sub $0x4,%3 \n"
- "movdqa %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stepx_x4), // %1
- "+r"(dst_argb), // %2
- "+rm"(dst_width), // %3
- "+r"(src_stepx_x12), // %4
- "+r"(row1) // %5
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
- );
-}
-
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- intptr_t x0 = 0, x1 = 0;
- asm volatile (
- "movd %5,%%xmm2 \n"
- "movd %6,%%xmm3 \n"
- "pshufd $0x0,%%xmm2,%%xmm2 \n"
- "pshufd $0x11,%%xmm3,%%xmm0 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pshufd $0x5,%%xmm3,%%xmm0 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pextrw $0x1,%%xmm2,%k0 \n"
- "pextrw $0x3,%%xmm2,%k1 \n"
- "cmp $0x0,%4 \n"
- "jl 99f \n"
- "sub $0x4,%4 \n"
- "jl 49f \n"
-
- LABELALIGN
- "40: \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
- "pextrw $0x5,%%xmm2,%k0 \n"
- "pextrw $0x7,%%xmm2,%k1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1
- MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4
- "pextrw $0x1,%%xmm2,%k0 \n"
- "pextrw $0x3,%%xmm2,%k1 \n"
- "punpckldq %%xmm4,%%xmm1 \n"
- "punpcklqdq %%xmm1,%%xmm0 \n"
- "sub $0x4,%4 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "jge 40b \n"
-
- "49: \n"
- "test $0x2,%4 \n"
- "je 29f \n"
- BUNDLEALIGN
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
- "pextrw $0x5,%%xmm2,%k0 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x8,2) ",%2 \n"
- "29: \n"
- "test $0x1,%4 \n"
- "je 99f \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- "movd %%xmm0," MEMACCESS(2) " \n"
- "99: \n"
- : "+a"(x0), // %0
- "+d"(x1), // %1
- "+r"(dst_argb), // %2
- "+r"(src_argb), // %3
- "+r"(dst_width) // %4
- : "rm"(x), // %5
- "rm"(dx) // %6
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
- );
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqa " MEMACCESS(1) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpckldq %%xmm0,%%xmm0 \n"
- "punpckhdq %%xmm1,%%xmm1 \n"
- "sub $0x8,%2 \n"
- "movdqa %%xmm0," MEMACCESS(0) " \n"
- "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "jg 1b \n"
-
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1"
-#endif
- );
-}
-
-// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
-static uvec8 kShuffleColARGB = {
- 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
- 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
-};
-
-// Shuffle table for duplicating 2 fractions into 8 bytes each
-static uvec8 kShuffleFractions = {
- 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
-};
-
-// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- intptr_t x0 = 0, x1 = 0;
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm5 \n"
- :
- : "m"(kShuffleColARGB), // %0
- "m"(kShuffleFractions) // %1
- );
-
- asm volatile (
- "movd %5,%%xmm2 \n"
- "movd %6,%%xmm3 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x9,%%xmm6 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "sub $0x2,%2 \n"
- "jl 29f \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "punpckldq %%xmm0,%%xmm2 \n"
- "punpckldq %%xmm3,%%xmm3 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
-
- LABELALIGN
- "2: \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
- "psrlw $0x9,%%xmm1 \n"
- BUNDLEALIGN
- MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0
- "pshufb %%xmm5,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm1 \n"
- "pmaddubsw %%xmm1,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "sub $0x2,%2 \n"
- "jge 2b \n"
-
- LABELALIGN
- "29: \n"
- "add $0x1,%2 \n"
- "jl 99f \n"
- "psrlw $0x9,%%xmm2 \n"
- BUNDLEALIGN
- MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
- "pshufb %%xmm5,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm2 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(0) " \n"
-
- LABELALIGN
- "99: \n"
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+rm"(dst_width), // %2
- "+r"(x0), // %3
- "+r"(x1) // %4
- : "rm"(x), // %5
- "rm"(dx) // %6
- : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
- , "r14"
-#endif
-#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
- );
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_X86(int num, int div) {
- asm volatile (
- "cdq \n"
- "shld $0x10,%%eax,%%edx \n"
- "shl $0x10,%%eax \n"
- "idiv %1 \n"
- "mov %0, %%eax \n"
- : "+a"(num) // %0
- : "c"(div) // %1
- : "memory", "cc", "edx"
- );
- return num;
-}
-
-// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
-int FixedDiv1_X86(int num, int div) {
- asm volatile (
- "cdq \n"
- "shld $0x10,%%eax,%%edx \n"
- "shl $0x10,%%eax \n"
- "sub $0x10001,%%eax \n"
- "sbb $0x0,%%edx \n"
- "sub $0x1,%1 \n"
- "idiv %1 \n"
- "mov %0, %%eax \n"
- : "+a"(num) // %0
- : "c"(div) // %1
- : "memory", "cc", "edx"
- );
- return num;
-}
-
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc
deleted file mode 100644
index 840b9738da..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc
+++ /dev/null
@@ -1,1320 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-// Offsets for source bytes 0 to 9
-static uvec8 kShuf0 =
- { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
-static uvec8 kShuf1 =
- { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf2 =
- { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 0 to 10
-static uvec8 kShuf01 =
- { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
-
-// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
-static uvec8 kShuf11 =
- { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf21 =
- { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
-
-// Coefficients for source bytes 0 to 10
-static uvec8 kMadd01 =
- { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
-
-// Coefficients for source bytes 10 to 21
-static uvec8 kMadd11 =
- { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
-
-// Coefficients for source bytes 21 to 31
-static uvec8 kMadd21 =
- { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
-
-// Coefficients for source bytes 21 to 31
-static vec16 kRound34 =
- { 2, 2, 2, 2, 2, 2, 2, 2 };
-
-static uvec8 kShuf38a =
- { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static uvec8 kShuf38b =
- { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 0,1,2
-static uvec8 kShufAc =
- { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 3,4,5
-static uvec8 kShufAc3 =
- { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x3 and 2x3
-static uvec16 kScaleAc33 =
- { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
-
-// Arrange first value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb0 =
- { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
-
-// Arrange second value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb1 =
- { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
-
-// Arrange third value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb2 =
- { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x2 and 2x2
-static uvec16 kScaleAb2 =
- { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
-
-// Reads 32 pixels, throws half away and writes 16 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // isolate odd pixels.
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x1 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x2 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// Reads 32 pixels, throws half away and writes 16 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
-
- align 4
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // isolate odd pixels.
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x1 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x2 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- align 4
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- sub ecx, 16
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// Point samples 32 pixels to 8 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000
- psrld xmm5, 24
- pslld xmm5, 16
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- psrlw xmm0, 8
- packuswb xmm0, xmm0
- sub ecx, 8
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x4 rectangle to 8x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_ptr
- mov esi, [esp + 8 + 8] // src_stride
- mov edx, [esp + 8 + 12] // dst_ptr
- mov ecx, [esp + 8 + 16] // dst_width
- lea edi, [esi + esi * 2] // src_stride * 3
- pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff
- psrlw xmm7, 8
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
- movdqa xmm2, [eax + esi * 2]
- movdqa xmm3, [eax + esi * 2 + 16]
- movdqa xmm4, [eax + edi]
- movdqa xmm5, [eax + edi + 16]
- lea eax, [eax + 32]
- pavgb xmm2, xmm4
- pavgb xmm3, xmm5
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm7
- pand xmm3, xmm7
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- movdqa xmm2, xmm0 // average columns (16 to 8 pixels)
- psrlw xmm0, 8
- pand xmm2, xmm7
- pavgw xmm0, xmm2
- packuswb xmm0, xmm0
-
- sub ecx, 8
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- jg wloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// Point samples 32 pixels to 24 pixels.
-// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
-// Then shuffled to do the scaling.
-
-// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- movdqa xmm3, kShuf0
- movdqa xmm4, kShuf1
- movdqa xmm5, kShuf2
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm1
- palignr xmm1, xmm0, 8
- pshufb xmm0, xmm3
- pshufb xmm1, xmm4
- pshufb xmm2, xmm5
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + 8], xmm1
- movq qword ptr [edx + 16], xmm2
- lea edx, [edx + 24]
- sub ecx, 24
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x2 rectangle to 24x1
-// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
-// Then shuffled to do the scaling.
-
-// Register usage:
-// xmm0 src_row 0
-// xmm1 src_row 1
-// xmm2 shuf 0
-// xmm3 shuf 1
-// xmm4 shuf 2
-// xmm5 madd 0
-// xmm6 madd 1
-// xmm7 kRound34
-
-// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShuf01
- movdqa xmm3, kShuf11
- movdqa xmm4, kShuf21
- movdqa xmm5, kMadd01
- movdqa xmm6, kMadd11
- movdqa xmm7, kRound34
-
- align 4
- wloop:
- movdqa xmm0, [eax] // pixels 0..7
- movdqa xmm1, [eax + esi]
- pavgb xmm0, xmm1
- pshufb xmm0, xmm2
- pmaddubsw xmm0, xmm5
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx], xmm0
- movdqu xmm0, [eax + 8] // pixels 8..15
- movdqu xmm1, [eax + esi + 8]
- pavgb xmm0, xmm1
- pshufb xmm0, xmm3
- pmaddubsw xmm0, xmm6
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx + 8], xmm0
- movdqa xmm0, [eax + 16] // pixels 16..23
- movdqa xmm1, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm1
- pshufb xmm0, xmm4
- movdqa xmm1, kMadd21
- pmaddubsw xmm0, xmm1
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- sub ecx, 24
- movq qword ptr [edx + 16], xmm0
- lea edx, [edx + 24]
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShuf01
- movdqa xmm3, kShuf11
- movdqa xmm4, kShuf21
- movdqa xmm5, kMadd01
- movdqa xmm6, kMadd11
- movdqa xmm7, kRound34
-
- align 4
- wloop:
- movdqa xmm0, [eax] // pixels 0..7
- movdqa xmm1, [eax + esi]
- pavgb xmm1, xmm0
- pavgb xmm0, xmm1
- pshufb xmm0, xmm2
- pmaddubsw xmm0, xmm5
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx], xmm0
- movdqu xmm0, [eax + 8] // pixels 8..15
- movdqu xmm1, [eax + esi + 8]
- pavgb xmm1, xmm0
- pavgb xmm0, xmm1
- pshufb xmm0, xmm3
- pmaddubsw xmm0, xmm6
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx + 8], xmm0
- movdqa xmm0, [eax + 16] // pixels 16..23
- movdqa xmm1, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm1, xmm0
- pavgb xmm0, xmm1
- pshufb xmm0, xmm4
- movdqa xmm1, kMadd21
- pmaddubsw xmm0, xmm1
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- sub ecx, 24
- movq qword ptr [edx + 16], xmm0
- lea edx, [edx+24]
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// 3/8 point sampler
-
-// Scale 32 pixels to 12
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- movdqa xmm4, kShuf38a
- movdqa xmm5, kShuf38b
-
- align 4
- xloop:
- movdqa xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
- movdqa xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11
- lea eax, [eax + 32]
- pshufb xmm0, xmm4
- pshufb xmm1, xmm5
- paddusb xmm0, xmm1
-
- sub ecx, 12
- movq qword ptr [edx], xmm0 // write 12 pixels
- movhlps xmm1, xmm0
- movd [edx + 8], xmm1
- lea edx, [edx + 12]
- jg xloop
-
- ret
- }
-}
-
-// Scale 16x3 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShufAc
- movdqa xmm3, kShufAc3
- movdqa xmm4, kScaleAc33
- pxor xmm5, xmm5
-
- align 4
- xloop:
- movdqa xmm0, [eax] // sum up 3 rows into xmm0/1
- movdqa xmm6, [eax + esi]
- movhlps xmm1, xmm0
- movhlps xmm7, xmm6
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- punpcklbw xmm6, xmm5
- punpcklbw xmm7, xmm5
- paddusw xmm0, xmm6
- paddusw xmm1, xmm7
- movdqa xmm6, [eax + esi * 2]
- lea eax, [eax + 16]
- movhlps xmm7, xmm6
- punpcklbw xmm6, xmm5
- punpcklbw xmm7, xmm5
- paddusw xmm0, xmm6
- paddusw xmm1, xmm7
-
- movdqa xmm6, xmm0 // 8 pixels -> 0,1,2 of xmm6
- psrldq xmm0, 2
- paddusw xmm6, xmm0
- psrldq xmm0, 2
- paddusw xmm6, xmm0
- pshufb xmm6, xmm2
-
- movdqa xmm7, xmm1 // 8 pixels -> 3,4,5 of xmm6
- psrldq xmm1, 2
- paddusw xmm7, xmm1
- psrldq xmm1, 2
- paddusw xmm7, xmm1
- pshufb xmm7, xmm3
- paddusw xmm6, xmm7
-
- pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6
- packuswb xmm6, xmm6
-
- sub ecx, 6
- movd [edx], xmm6 // write 6 pixels
- psrlq xmm6, 16
- movd [edx + 2], xmm6
- lea edx, [edx + 6]
- jg xloop
-
- pop esi
- ret
- }
-}
-
-// Scale 16x2 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShufAb0
- movdqa xmm3, kShufAb1
- movdqa xmm4, kShufAb2
- movdqa xmm5, kScaleAb2
-
- align 4
- xloop:
- movdqa xmm0, [eax] // average 2 rows into xmm0
- pavgb xmm0, [eax + esi]
- lea eax, [eax + 16]
-
- movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1
- pshufb xmm1, xmm2
- movdqa xmm6, xmm0
- pshufb xmm6, xmm3
- paddusw xmm1, xmm6
- pshufb xmm0, xmm4
- paddusw xmm1, xmm0
-
- pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2
- packuswb xmm1, xmm1
-
- sub ecx, 6
- movd [edx], xmm1 // write 6 pixels
- psrlq xmm1, 16
- movd [edx + 2], xmm1
- lea edx, [edx + 6]
- jg xloop
-
- pop esi
- ret
- }
-}
-
-// Reads 16xN bytes and produces 16 shorts at a time.
-// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.
-__declspec(naked) __declspec(align(16))
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width,
- int src_height) {
- __asm {
- push esi
- push edi
- push ebx
- push ebp
- mov esi, [esp + 16 + 4] // src_ptr
- mov edx, [esp + 16 + 8] // src_stride
- mov edi, [esp + 16 + 12] // dst_ptr
- mov ecx, [esp + 16 + 16] // dst_width
- mov ebx, [esp + 16 + 20] // height
- pxor xmm4, xmm4
- dec ebx
-
- align 4
- xloop:
- // first row
- movdqa xmm0, [esi]
- lea eax, [esi + edx]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm4
- punpckhbw xmm1, xmm4
- lea esi, [esi + 16]
- mov ebp, ebx
- test ebp, ebp
- je ydone
-
- // sum remaining rows
- align 4
- yloop:
- movdqa xmm2, [eax] // read 16 pixels
- lea eax, [eax + edx] // advance to next row
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
- paddusw xmm0, xmm2 // sum 16 words
- paddusw xmm1, xmm3
- sub ebp, 1
- jg yloop
-
- align 4
- ydone:
- movdqa [edi], xmm0
- movdqa [edi + 16], xmm1
- lea edi, [edi + 32]
-
- sub ecx, 16
- jg xloop
-
- pop ebp
- pop ebx
- pop edi
- pop esi
- ret
- }
-}
-
-// Bilinear column filtering. SSSE3 version.
-// TODO(fbarchard): Port to Neon
-// TODO(fbarchard): Switch the following:
-// xor ebx, ebx
-// mov bx, word ptr [esi + eax] // 2 source x0 pixels
-// To
-// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
-// when drmemory bug fixed.
-// https://code.google.com/p/drmemory/issues/detail?id=1396
-
-__declspec(naked) __declspec(align(16))
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- __asm {
- push ebx
- push esi
- push edi
- mov edi, [esp + 12 + 4] // dst_ptr
- mov esi, [esp + 12 + 8] // src_ptr
- mov ecx, [esp + 12 + 12] // dst_width
- movd xmm2, [esp + 12 + 16] // x
- movd xmm3, [esp + 12 + 20] // dx
- mov eax, 0x04040000 // shuffle to line up fractions with pixel.
- movd xmm5, eax
- pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
- psrlw xmm6, 9
- pextrw eax, xmm2, 1 // get x0 integer. preroll
- sub ecx, 2
- jl xloop29
-
- movdqa xmm0, xmm2 // x1 = x0 + dx
- paddd xmm0, xmm3
- punpckldq xmm2, xmm0 // x0 x1
- punpckldq xmm3, xmm3 // dx dx
- paddd xmm3, xmm3 // dx * 2, dx * 2
- pextrw edx, xmm2, 3 // get x1 integer. preroll
-
- // 2 Pixel loop.
- align 4
- xloop2:
- movdqa xmm1, xmm2 // x0, x1 fractions.
- paddd xmm2, xmm3 // x += dx
- movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
- movd xmm0, ebx
- psrlw xmm1, 9 // 7 bit fractions.
- movzx ebx, word ptr [esi + edx] // 2 source x1 pixels
- movd xmm4, ebx
- pshufb xmm1, xmm5 // 0011
- punpcklwd xmm0, xmm4
- pxor xmm1, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm1 // 16 bit, 2 pixels.
- pextrw eax, xmm2, 1 // get x0 integer. next iteration.
- pextrw edx, xmm2, 3 // get x1 integer. next iteration.
- psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // 8 bits, 2 pixels.
- movd ebx, xmm0
- mov [edi], bx
- lea edi, [edi + 2]
- sub ecx, 2 // 2 pixels
- jge xloop2
-
- align 4
- xloop29:
-
- add ecx, 2 - 1
- jl xloop99
-
- // 1 pixel remainder
- movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
- movd xmm0, ebx
- psrlw xmm2, 9 // 7 bit fractions.
- pshufb xmm2, xmm5 // 0011
- pxor xmm2, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm2 // 16 bit
- psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // 8 bits
- movd ebx, xmm0
- mov [edi], bl
-
- align 4
- xloop99:
-
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-// Reads 16 pixels, duplicates them and writes 32 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- __asm {
- mov edx, [esp + 4] // dst_ptr
- mov eax, [esp + 8] // src_ptr
- mov ecx, [esp + 12] // dst_width
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm0
- punpckhbw xmm1, xmm1
- sub ecx, 32
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- jg wloop
-
- ret
- }
-}
-
-// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_argb
- // src_stride ignored
- mov edx, [esp + 12] // dst_argb
- mov ecx, [esp + 16] // dst_width
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- shufps xmm0, xmm1, 0xdd
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 8x1 rectangle to 4x1.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_argb
- // src_stride ignored
- mov edx, [esp + 12] // dst_argb
- mov ecx, [esp + 16] // dst_width
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm0
- shufps xmm0, xmm1, 0x88 // even pixels
- shufps xmm2, xmm1, 0xdd // odd pixels
- pavgb xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- ret
- }
-}
-
-// Blends 8x2 rectangle to 4x1.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // dst_width
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + 16]
- movdqa xmm2, [eax + esi]
- movdqa xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
- movdqa xmm2, xmm0 // average columns (8 to 4 pixels)
- shufps xmm0, xmm1, 0x88 // even pixels
- shufps xmm2, xmm1, 0xdd // odd pixels
- pavgb xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- __asm {
- push ebx
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- // src_stride ignored
- mov ebx, [esp + 8 + 12] // src_stepx
- mov edx, [esp + 8 + 16] // dst_argb
- mov ecx, [esp + 8 + 20] // dst_width
- lea ebx, [ebx * 4]
- lea edi, [ebx + ebx * 2]
-
- align 4
- wloop:
- movd xmm0, [eax]
- movd xmm1, [eax + ebx]
- punpckldq xmm0, xmm1
- movd xmm2, [eax + ebx * 2]
- movd xmm3, [eax + edi]
- lea eax, [eax + ebx * 4]
- punpckldq xmm2, xmm3
- punpcklqdq xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- pop edi
- pop ebx
- ret
- }
-}
-
-// Blends four 2x2 to 4x1.
-// Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- __asm {
- push ebx
- push esi
- push edi
- mov eax, [esp + 12 + 4] // src_argb
- mov esi, [esp + 12 + 8] // src_stride
- mov ebx, [esp + 12 + 12] // src_stepx
- mov edx, [esp + 12 + 16] // dst_argb
- mov ecx, [esp + 12 + 20] // dst_width
- lea esi, [eax + esi] // row1 pointer
- lea ebx, [ebx * 4]
- lea edi, [ebx + ebx * 2]
-
- align 4
- wloop:
- movq xmm0, qword ptr [eax] // row0 4 pairs
- movhps xmm0, qword ptr [eax + ebx]
- movq xmm1, qword ptr [eax + ebx * 2]
- movhps xmm1, qword ptr [eax + edi]
- lea eax, [eax + ebx * 4]
- movq xmm2, qword ptr [esi] // row1 4 pairs
- movhps xmm2, qword ptr [esi + ebx]
- movq xmm3, qword ptr [esi + ebx * 2]
- movhps xmm3, qword ptr [esi + edi]
- lea esi, [esi + ebx * 4]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
- movdqa xmm2, xmm0 // average columns (8 to 4 pixels)
- shufps xmm0, xmm1, 0x88 // even pixels
- shufps xmm2, xmm1, 0xdd // odd pixels
- pavgb xmm0, xmm2
- sub ecx, 4
- movdqa [edx], xmm0
- lea edx, [edx + 16]
- jg wloop
-
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-// Column scaling unfiltered. SSE2 version.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- __asm {
- push edi
- push esi
- mov edi, [esp + 8 + 4] // dst_argb
- mov esi, [esp + 8 + 8] // src_argb
- mov ecx, [esp + 8 + 12] // dst_width
- movd xmm2, [esp + 8 + 16] // x
- movd xmm3, [esp + 8 + 20] // dx
-
- pshufd xmm2, xmm2, 0 // x0 x0 x0 x0
- pshufd xmm0, xmm3, 0x11 // dx 0 dx 0
- paddd xmm2, xmm0
- paddd xmm3, xmm3 // 0, 0, 0, dx * 2
- pshufd xmm0, xmm3, 0x05 // dx * 2, dx * 2, 0, 0
- paddd xmm2, xmm0 // x3 x2 x1 x0
- paddd xmm3, xmm3 // 0, 0, 0, dx * 4
- pshufd xmm3, xmm3, 0 // dx * 4, dx * 4, dx * 4, dx * 4
-
- pextrw eax, xmm2, 1 // get x0 integer.
- pextrw edx, xmm2, 3 // get x1 integer.
-
- cmp ecx, 0
- jle xloop99
- sub ecx, 4
- jl xloop49
-
- // 4 Pixel loop.
- align 4
- xloop4:
- movd xmm0, [esi + eax * 4] // 1 source x0 pixels
- movd xmm1, [esi + edx * 4] // 1 source x1 pixels
- pextrw eax, xmm2, 5 // get x2 integer.
- pextrw edx, xmm2, 7 // get x3 integer.
- paddd xmm2, xmm3 // x += dx
- punpckldq xmm0, xmm1 // x0 x1
-
- movd xmm1, [esi + eax * 4] // 1 source x2 pixels
- movd xmm4, [esi + edx * 4] // 1 source x3 pixels
- pextrw eax, xmm2, 1 // get x0 integer. next iteration.
- pextrw edx, xmm2, 3 // get x1 integer. next iteration.
- punpckldq xmm1, xmm4 // x2 x3
- punpcklqdq xmm0, xmm1 // x0 x1 x2 x3
- sub ecx, 4 // 4 pixels
- movdqu [edi], xmm0
- lea edi, [edi + 16]
- jge xloop4
-
- align 4
- xloop49:
- test ecx, 2
- je xloop29
-
- // 2 Pixels.
- movd xmm0, [esi + eax * 4] // 1 source x0 pixels
- movd xmm1, [esi + edx * 4] // 1 source x1 pixels
- pextrw eax, xmm2, 5 // get x2 integer.
- punpckldq xmm0, xmm1 // x0 x1
-
- movq qword ptr [edi], xmm0
- lea edi, [edi + 8]
-
- xloop29:
- test ecx, 1
- je xloop99
-
- // 1 Pixels.
- movd xmm0, [esi + eax * 4] // 1 source x2 pixels
- movd dword ptr [edi], xmm0
- align 4
- xloop99:
-
- pop esi
- pop edi
- ret
- }
-}
-
-// Bilinear row filtering combines 2x1 -> 1x1. SSSE3 version.
-// TODO(fbarchard): Port to Neon
-
-// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
-static uvec8 kShuffleColARGB = {
- 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
- 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
-};
-
-// Shuffle table for duplicating 2 fractions into 8 bytes each
-static uvec8 kShuffleFractions = {
- 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
-};
-
-__declspec(naked) __declspec(align(16))
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_argb
- mov esi, [esp + 8 + 8] // src_argb
- mov ecx, [esp + 8 + 12] // dst_width
- movd xmm2, [esp + 8 + 16] // x
- movd xmm3, [esp + 8 + 20] // dx
- movdqa xmm4, kShuffleColARGB
- movdqa xmm5, kShuffleFractions
- pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
- psrlw xmm6, 9
- pextrw eax, xmm2, 1 // get x0 integer. preroll
- sub ecx, 2
- jl xloop29
-
- movdqa xmm0, xmm2 // x1 = x0 + dx
- paddd xmm0, xmm3
- punpckldq xmm2, xmm0 // x0 x1
- punpckldq xmm3, xmm3 // dx dx
- paddd xmm3, xmm3 // dx * 2, dx * 2
- pextrw edx, xmm2, 3 // get x1 integer. preroll
-
- // 2 Pixel loop.
- align 4
- xloop2:
- movdqa xmm1, xmm2 // x0, x1 fractions.
- paddd xmm2, xmm3 // x += dx
- movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels
- psrlw xmm1, 9 // 7 bit fractions.
- movhps xmm0, qword ptr [esi + edx * 4] // 2 source x1 pixels
- pshufb xmm1, xmm5 // 0000000011111111
- pshufb xmm0, xmm4 // arrange pixels into pairs
- pxor xmm1, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm1 // argb_argb 16 bit, 2 pixels.
- pextrw eax, xmm2, 1 // get x0 integer. next iteration.
- pextrw edx, xmm2, 3 // get x1 integer. next iteration.
- psrlw xmm0, 7 // argb 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // argb_argb 8 bits, 2 pixels.
- movq qword ptr [edi], xmm0
- lea edi, [edi + 8]
- sub ecx, 2 // 2 pixels
- jge xloop2
-
- align 4
- xloop29:
-
- add ecx, 2 - 1
- jl xloop99
-
- // 1 pixel remainder
- psrlw xmm2, 9 // 7 bit fractions.
- movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels
- pshufb xmm2, xmm5 // 00000000
- pshufb xmm0, xmm4 // arrange pixels into pairs
- pxor xmm2, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm2 // argb 16 bit, 1 pixel.
- psrlw xmm0, 7
- packuswb xmm0, xmm0 // argb 8 bits, 1 pixel.
- movd [edi], xmm0
-
- align 4
- xloop99:
-
- pop edi
- pop esi
- ret
- }
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- __asm {
- mov edx, [esp + 4] // dst_argb
- mov eax, [esp + 8] // src_argb
- mov ecx, [esp + 12] // dst_width
-
- align 4
- wloop:
- movdqa xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpckldq xmm0, xmm0
- punpckhdq xmm1, xmm1
- sub ecx, 8
- movdqa [edx], xmm0
- movdqa [edx + 16], xmm1
- lea edx, [edx + 32]
- jg wloop
-
- ret
- }
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
-int FixedDiv_X86(int num, int div) {
- __asm {
- mov eax, [esp + 4] // num
- cdq // extend num to 64 bits
- shld edx, eax, 16 // 32.16
- shl eax, 16
- idiv dword ptr [esp + 8]
- ret
- }
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
-int FixedDiv1_X86(int num, int div) {
- __asm {
- mov eax, [esp + 4] // num
- mov ecx, [esp + 8] // denom
- cdq // extend num to 64 bits
- shld edx, eax, 16 // 32.16
- shl eax, 16
- sub eax, 0x00010001
- sbb edx, 0
- sub ecx, 1
- idiv ecx
- ret
- }
-}
-
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc
deleted file mode 100755
index efbedf46e2..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
-
-struct FourCCAliasEntry {
- uint32 alias;
- uint32 canonical;
-};
-
-static const struct FourCCAliasEntry kFourCCAliases[] = {
- {FOURCC_IYUV, FOURCC_I420},
- {FOURCC_YU16, FOURCC_I422},
- {FOURCC_YU24, FOURCC_I444},
- {FOURCC_YUYV, FOURCC_YUY2},
- {FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
- {FOURCC_HDYC, FOURCC_UYVY},
- {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
- {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
- {FOURCC_DMB1, FOURCC_MJPG},
- {FOURCC_BA81, FOURCC_BGGR},
- {FOURCC_RGB3, FOURCC_RAW },
- {FOURCC_BGR3, FOURCC_24BG},
- {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
- {FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB
- {FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
- {FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
- {FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
-};
-// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
-// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
-
-LIBYUV_API
-uint32 CanonicalFourCC(uint32 fourcc) {
- int i;
- for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
- if (kFourCCAliases[i].alias == fourcc) {
- return kFourCCAliases[i].canonical;
- }
- }
- // Not an alias, so return it as-is.
- return fourcc;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm b/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm
deleted file mode 100755
index cb5c32df3a..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm
+++ /dev/null
@@ -1,1136 +0,0 @@
-;*****************************************************************************
-;* x86inc.asm: x264asm abstraction layer
-;*****************************************************************************
-;* Copyright (C) 2005-2012 x264 project
-;*
-;* Authors: Loren Merritt <lorenm@u.washington.edu>
-;* Anton Mitrofanov <BugMaster@narod.ru>
-;* Jason Garrett-Glaser <darkshikari@gmail.com>
-;* Henrik Gramner <hengar-6@student.ltu.se>
-;*
-;* Permission to use, copy, modify, and/or distribute this software for any
-;* purpose with or without fee is hereby granted, provided that the above
-;* copyright notice and this permission notice appear in all copies.
-;*
-;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-;*****************************************************************************
-
-; This is a header file for the x264ASM assembly language, which uses
-; NASM/YASM syntax combined with a large number of macros to provide easy
-; abstraction between different calling conventions (x86_32, win64, linux64).
-; It also has various other useful features to simplify writing the kind of
-; DSP functions that are most often used in x264.
-
-; Unlike the rest of x264, this file is available under an ISC license, as it
-; has significant usefulness outside of x264 and we want it to be available
-; to the largest audience possible. Of course, if you modify it for your own
-; purposes to add a new feature, we strongly encourage contributing a patch
-; as this feature might be useful for others as well. Send patches or ideas
-; to x264-devel@videolan.org .
-
-; Local changes for libyuv:
-; remove %define program_name and references in labels
-; rename cpus to uppercase
-
-%define WIN64 0
-%define UNIX64 0
-%if ARCH_X86_64
- %ifidn __OUTPUT_FORMAT__,win32
- %define WIN64 1
- %elifidn __OUTPUT_FORMAT__,win64
- %define WIN64 1
- %else
- %define UNIX64 1
- %endif
-%endif
-
-%ifdef PREFIX
- %define mangle(x) _ %+ x
-%else
- %define mangle(x) x
-%endif
-
-; Name of the .rodata section.
-; Kludge: Something on OS X fails to align .rodata even given an align attribute,
-; so use a different read-only section.
-%macro SECTION_RODATA 0-1 16
- %ifidn __OUTPUT_FORMAT__,macho64
- SECTION .text align=%1
- %elifidn __OUTPUT_FORMAT__,macho
- SECTION .text align=%1
- fakegot:
- %elifidn __OUTPUT_FORMAT__,aout
- section .text
- %else
- SECTION .rodata align=%1
- %endif
-%endmacro
-
-; aout does not support align=
-%macro SECTION_TEXT 0-1 16
- %ifidn __OUTPUT_FORMAT__,aout
- SECTION .text
- %else
- SECTION .text align=%1
- %endif
-%endmacro
-
-%if WIN64
- %define PIC
-%elif ARCH_X86_64 == 0
-; x86_32 doesn't require PIC.
-; Some distros prefer shared objects to be PIC, but nothing breaks if
-; the code contains a few textrels, so we'll skip that complexity.
- %undef PIC
-%endif
-%ifdef PIC
- default rel
-%endif
-
-; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPU amdnop
-
-; Macros to eliminate most code duplication between x86_32 and x86_64:
-; Currently this works only for leaf functions which load all their arguments
-; into registers at the start, and make no other use of the stack. Luckily that
-; covers most of x264's asm.
-
-; PROLOGUE:
-; %1 = number of arguments. loads them from stack if needed.
-; %2 = number of registers used. pushes callee-saved regs if needed.
-; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = list of names to define to registers
-; PROLOGUE can also be invoked by adding the same options to cglobal
-
-; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
-
-; TODO Some functions can use some args directly from the stack. If they're the
-; last args then you can just not declare them, but if they're in the middle
-; we need more flexible macro.
-
-; RET:
-; Pops anything that was pushed by PROLOGUE, and returns.
-
-; REP_RET:
-; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
-; which are slow when a normal ret follows a branch.
-
-; registers:
-; rN and rNq are the native-size register holding function argument N
-; rNd, rNw, rNb are dword, word, and byte size
-; rNh is the high 8 bits of the word size
-; rNm is the original location of arg N (a register or on the stack), dword
-; rNmp is native size
-
-%macro DECLARE_REG 2-3
- %define r%1q %2
- %define r%1d %2d
- %define r%1w %2w
- %define r%1b %2b
- %define r%1h %2h
- %if %0 == 2
- %define r%1m %2d
- %define r%1mp %2
- %elif ARCH_X86_64 ; memory
- %define r%1m [rsp + stack_offset + %3]
- %define r%1mp qword r %+ %1m
- %else
- %define r%1m [esp + stack_offset + %3]
- %define r%1mp dword r %+ %1m
- %endif
- %define r%1 %2
-%endmacro
-
-%macro DECLARE_REG_SIZE 3
- %define r%1q r%1
- %define e%1q r%1
- %define r%1d e%1
- %define e%1d e%1
- %define r%1w %1
- %define e%1w %1
- %define r%1h %3
- %define e%1h %3
- %define r%1b %2
- %define e%1b %2
-%if ARCH_X86_64 == 0
- %define r%1 e%1
-%endif
-%endmacro
-
-DECLARE_REG_SIZE ax, al, ah
-DECLARE_REG_SIZE bx, bl, bh
-DECLARE_REG_SIZE cx, cl, ch
-DECLARE_REG_SIZE dx, dl, dh
-DECLARE_REG_SIZE si, sil, null
-DECLARE_REG_SIZE di, dil, null
-DECLARE_REG_SIZE bp, bpl, null
-
-; t# defines for when per-arch register allocation is more complex than just function arguments
-
-%macro DECLARE_REG_TMP 1-*
- %assign %%i 0
- %rep %0
- CAT_XDEFINE t, %%i, r%1
- %assign %%i %%i+1
- %rotate 1
- %endrep
-%endmacro
-
-%macro DECLARE_REG_TMP_SIZE 0-*
- %rep %0
- %define t%1q t%1 %+ q
- %define t%1d t%1 %+ d
- %define t%1w t%1 %+ w
- %define t%1h t%1 %+ h
- %define t%1b t%1 %+ b
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
-
-%if ARCH_X86_64
- %define gprsize 8
-%else
- %define gprsize 4
-%endif
-
-%macro PUSH 1
- push %1
- %assign stack_offset stack_offset+gprsize
-%endmacro
-
-%macro POP 1
- pop %1
- %assign stack_offset stack_offset-gprsize
-%endmacro
-
-%macro PUSH_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- PUSH r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro POP_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- pop r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro LOAD_IF_USED 1-*
- %rep %0
- %if %1 < num_args
- mov r%1, r %+ %1 %+ mp
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro SUB 2
- sub %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset+(%2)
- %endif
-%endmacro
-
-%macro ADD 2
- add %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset-(%2)
- %endif
-%endmacro
-
-%macro movifnidn 2
- %ifnidn %1, %2
- mov %1, %2
- %endif
-%endmacro
-
-%macro movsxdifnidn 2
- %ifnidn %1, %2
- movsxd %1, %2
- %endif
-%endmacro
-
-%macro ASSERT 1
- %if (%1) == 0
- %error assert failed
- %endif
-%endmacro
-
-%macro DEFINE_ARGS 0-*
- %ifdef n_arg_names
- %assign %%i 0
- %rep n_arg_names
- CAT_UNDEF arg_name %+ %%i, q
- CAT_UNDEF arg_name %+ %%i, d
- CAT_UNDEF arg_name %+ %%i, w
- CAT_UNDEF arg_name %+ %%i, h
- CAT_UNDEF arg_name %+ %%i, b
- CAT_UNDEF arg_name %+ %%i, m
- CAT_UNDEF arg_name %+ %%i, mp
- CAT_UNDEF arg_name, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-
- %xdefine %%stack_offset stack_offset
- %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
- %assign %%i 0
- %rep %0
- %xdefine %1q r %+ %%i %+ q
- %xdefine %1d r %+ %%i %+ d
- %xdefine %1w r %+ %%i %+ w
- %xdefine %1h r %+ %%i %+ h
- %xdefine %1b r %+ %%i %+ b
- %xdefine %1m r %+ %%i %+ m
- %xdefine %1mp r %+ %%i %+ mp
- CAT_XDEFINE arg_name, %%i, %1
- %assign %%i %%i+1
- %rotate 1
- %endrep
- %xdefine stack_offset %%stack_offset
- %assign n_arg_names %0
-%endmacro
-
-%if WIN64 ; Windows x64 ;=================================================
-
-DECLARE_REG 0, rcx
-DECLARE_REG 1, rdx
-DECLARE_REG 2, R8
-DECLARE_REG 3, R9
-DECLARE_REG 4, R10, 40
-DECLARE_REG 5, R11, 48
-DECLARE_REG 6, rax, 56
-DECLARE_REG 7, rdi, 64
-DECLARE_REG 8, rsi, 72
-DECLARE_REG 9, rbx, 80
-DECLARE_REG 10, rbp, 88
-DECLARE_REG 11, R12, 96
-DECLARE_REG 12, R13, 104
-DECLARE_REG 13, R14, 112
-DECLARE_REG 14, R15, 120
-
-%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- ASSERT regs_used <= 15
- PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
- %if mmsize == 8
- %assign xmm_regs_used 0
- %else
- WIN64_SPILL_XMM %3
- %endif
- LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS %4
-%endmacro
-
-%macro WIN64_SPILL_XMM 1
- %assign xmm_regs_used %1
- ASSERT xmm_regs_used <= 16
- %if xmm_regs_used > 6
- SUB rsp, (xmm_regs_used-6)*16+16
- %assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
- %assign %%i %%i-1
- movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
- %endrep
- %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM_INTERNAL 1
- %if xmm_regs_used > 6
- %assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
- %assign %%i %%i-1
- movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
- %endrep
- add %1, (xmm_regs_used-6)*16+16
- %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM 1
- WIN64_RESTORE_XMM_INTERNAL %1
- %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
- %assign xmm_regs_used 0
-%endmacro
-
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
-
-%macro RET 0
- WIN64_RESTORE_XMM_INTERNAL rsp
- POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
-
-DECLARE_REG 0, rdi
-DECLARE_REG 1, rsi
-DECLARE_REG 2, rdx
-DECLARE_REG 3, rcx
-DECLARE_REG 4, R8
-DECLARE_REG 5, R9
-DECLARE_REG 6, rax, 8
-DECLARE_REG 7, R10, 16
-DECLARE_REG 8, R11, 24
-DECLARE_REG 9, rbx, 32
-DECLARE_REG 10, rbp, 40
-DECLARE_REG 11, R12, 48
-DECLARE_REG 12, R13, 56
-DECLARE_REG 13, R14, 64
-DECLARE_REG 14, R15, 72
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- ASSERT regs_used <= 15
- PUSH_IF_USED 9, 10, 11, 12, 13, 14
- LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 9 || mmsize == 32
-
-%macro RET 0
- POP_IF_USED 14, 13, 12, 11, 10, 9
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%else ; X86_32 ;==============================================================
-
-DECLARE_REG 0, eax, 4
-DECLARE_REG 1, ecx, 8
-DECLARE_REG 2, edx, 12
-DECLARE_REG 3, ebx, 16
-DECLARE_REG 4, esi, 20
-DECLARE_REG 5, edi, 24
-DECLARE_REG 6, ebp, 28
-%define rsp esp
-
-%macro DECLARE_ARG 1-*
- %rep %0
- %define r%1m [esp + stack_offset + 4*%1 + 4]
- %define r%1mp dword r%1m
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- %if regs_used > 7
- %assign regs_used 7
- %endif
- ASSERT regs_used >= num_args
- PUSH_IF_USED 3, 4, 5, 6
- LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
- DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 3 || mmsize == 32
-
-%macro RET 0
- POP_IF_USED 6, 5, 4, 3
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%endif ;======================================================================
-
-%if WIN64 == 0
-%macro WIN64_SPILL_XMM 1
-%endmacro
-%macro WIN64_RESTORE_XMM 1
-%endmacro
-%endif
-
-%macro REP_RET 0
- %if has_epilogue
- RET
- %else
- rep ret
- %endif
-%endmacro
-
-%macro TAIL_CALL 2 ; callee, is_nonadjacent
- %if has_epilogue
- call %1
- RET
- %elif %2
- jmp %1
- %endif
-%endmacro
-
-;=============================================================================
-; arch-independent part
-;=============================================================================
-
-%assign function_align 16
-
-; Begin a function.
-; Applies any symbol mangling needed for C linkage, and sets up a define such that
-; subsequent uses of the function name automatically refer to the mangled version.
-; Appends cpuflags to the function name if cpuflags has been specified.
-%macro cglobal 1-2+ ; name, [PROLOGUE args]
-%if %0 == 1
- cglobal_internal %1 %+ SUFFIX
-%else
- cglobal_internal %1 %+ SUFFIX, %2
-%endif
-%endmacro
-%macro cglobal_internal 1-2+
- %ifndef cglobaled_%1
- %xdefine %1 mangle(%1)
- %xdefine %1.skip_prologue %1 %+ .skip_prologue
- CAT_XDEFINE cglobaled_, %1, 1
- %endif
- %xdefine current_function %1
- %ifidn __OUTPUT_FORMAT__,elf
- global %1:function hidden
- %else
- global %1
- %endif
- align function_align
- %1:
- RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
- %assign stack_offset 0
- %if %0 > 1
- PROLOGUE %2
- %endif
-%endmacro
-
-%macro cextern 1
- %xdefine %1 mangle(%1)
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-; like cextern, but without the prefix
-%macro cextern_naked 1
- %xdefine %1 mangle(%1)
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-%macro const 2+
- %xdefine %1 mangle(%1)
- global %1
- %1: %2
-%endmacro
-
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf32
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf64
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-
-; cpuflags
-
-%assign cpuflags_MMX (1<<0)
-%assign cpuflags_MMX2 (1<<1) | cpuflags_MMX
-%assign cpuflags_3dnow (1<<2) | cpuflags_MMX
-%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
-%assign cpuflags_SSE (1<<4) | cpuflags_MMX2
-%assign cpuflags_SSE2 (1<<5) | cpuflags_SSE
-%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
-%assign cpuflags_SSE3 (1<<7) | cpuflags_SSE2
-%assign cpuflags_SSSE3 (1<<8) | cpuflags_SSE3
-%assign cpuflags_SSE4 (1<<9) | cpuflags_SSSE3
-%assign cpuflags_SSE42 (1<<10)| cpuflags_SSE4
-%assign cpuflags_AVX (1<<11)| cpuflags_SSE42
-%assign cpuflags_xop (1<<12)| cpuflags_AVX
-%assign cpuflags_fma4 (1<<13)| cpuflags_AVX
-%assign cpuflags_AVX2 (1<<14)| cpuflags_AVX
-%assign cpuflags_fma3 (1<<15)| cpuflags_AVX
-
-%assign cpuflags_cache32 (1<<16)
-%assign cpuflags_cache64 (1<<17)
-%assign cpuflags_slowctz (1<<18)
-%assign cpuflags_lzcnt (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom (1<<22)
-%assign cpuflags_bmi1 (1<<23)
-%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1
-%assign cpuflags_tbm (1<<25)|cpuflags_bmi1
-
-%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
-
-; Takes up to 2 cpuflags from the above list.
-; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
-; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
-%macro INIT_CPUFLAGS 0-2
- %if %0 >= 1
- %xdefine cpuname %1
- %assign cpuflags cpuflags_%1
- %if %0 >= 2
- %xdefine cpuname %1_%2
- %assign cpuflags cpuflags | cpuflags_%2
- %endif
- %xdefine SUFFIX _ %+ cpuname
- %if cpuflag(AVX)
- %assign AVX_enabled 1
- %endif
- %if mmsize == 16 && notcpuflag(SSE2)
- %define mova movaps
- %define movu movups
- %define movnta movntps
- %endif
- %if cpuflag(aligned)
- %define movu mova
- %elifidn %1, SSE3
- %define movu lddqu
- %endif
- %else
- %xdefine SUFFIX
- %undef cpuname
- %undef cpuflags
- %endif
-%endmacro
-
-; merge MMX and SSE*
-
-%macro CAT_XDEFINE 3
- %xdefine %1%2 %3
-%endmacro
-
-%macro CAT_UNDEF 2
- %undef %1%2
-%endmacro
-
-%macro INIT_MMX 0-1+
- %assign AVX_enabled 0
- %define RESET_MM_PERMUTATION INIT_MMX %1
- %define mmsize 8
- %define num_mmregs 8
- %define mova movq
- %define movu movq
- %define movh movd
- %define movnta movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 8
- CAT_UNDEF m, %%i
- CAT_UNDEF nmm, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_XMM 0-1+
- %assign AVX_enabled 0
- %define RESET_MM_PERMUTATION INIT_XMM %1
- %define mmsize 16
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova movdqa
- %define movu movdqu
- %define movh movq
- %define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_YMM 0-1+
- %assign AVX_enabled 1
- %define RESET_MM_PERMUTATION INIT_YMM %1
- %define mmsize 32
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova vmovaps
- %define movu vmovups
- %undef movh
- %define movnta vmovntps
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nymm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-INIT_XMM
-
-; I often want to use macros that permute their arguments. e.g. there's no
-; efficient way to implement butterfly or transpose or dct without swapping some
-; arguments.
-;
-; I would like to not have to manually keep track of the permutations:
-; If I insert a permutation in the middle of a function, it should automatically
-; change everything that follows. For more complex macros I may also have multiple
-; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
-;
-; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
-; permutes its arguments. It's equivalent to exchanging the contents of the
-; registers, except that this way you exchange the register names instead, so it
-; doesn't cost any cycles.
-
-%macro PERMUTE 2-* ; takes a list of pairs to swap
-%rep %0/2
- %xdefine tmp%2 m%2
- %xdefine ntmp%2 nm%2
- %rotate 2
-%endrep
-%rep %0/2
- %xdefine m%1 tmp%2
- %xdefine nm%1 ntmp%2
- %undef tmp%2
- %undef ntmp%2
- %rotate 2
-%endrep
-%endmacro
-
-%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
-%rep %0-1
-%ifdef m%1
- %xdefine tmp m%1
- %xdefine m%1 m%2
- %xdefine m%2 tmp
- CAT_XDEFINE n, m%1, %1
- CAT_XDEFINE n, m%2, %2
-%else
- ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
- ; Be careful using this mode in nested macros though, as in some cases there may be
- ; other copies of m# that have already been dereferenced and don't get updated correctly.
- %xdefine %%n1 n %+ %1
- %xdefine %%n2 n %+ %2
- %xdefine tmp m %+ %%n1
- CAT_XDEFINE m, %%n1, m %+ %%n2
- CAT_XDEFINE m, %%n2, tmp
- CAT_XDEFINE n, m %+ %%n1, %%n1
- CAT_XDEFINE n, m %+ %%n2, %%n2
-%endif
- %undef tmp
- %rotate 1
-%endrep
-%endmacro
-
-; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
-; calls to that function will automatically load the permutation, so values can
-; be returned in mmregs.
-%macro SAVE_MM_PERMUTATION 0-1
- %if %0
- %xdefine %%f %1_m
- %else
- %xdefine %%f current_function %+ _m
- %endif
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE %%f, %%i, m %+ %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-%macro LOAD_MM_PERMUTATION 1 ; name to load from
- %ifdef %1_m0
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, %1_m %+ %%i
- CAT_XDEFINE n, m %+ %%i, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-%endmacro
-
-; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
-%macro call 1
- call_internal %1, %1 %+ SUFFIX
-%endmacro
-%macro call_internal 2
- %xdefine %%i %1
- %ifndef cglobaled_%1
- %ifdef cglobaled_%2
- %xdefine %%i %2
- %endif
- %endif
- call %%i
- LOAD_MM_PERMUTATION %%i
-%endmacro
-
-; Substitutions that reduce instruction size but are functionally equivalent
-%macro add 2
- %ifnum %2
- %if %2==128
- sub %1, -128
- %else
- add %1, %2
- %endif
- %else
- add %1, %2
- %endif
-%endmacro
-
-%macro sub 2
- %ifnum %2
- %if %2==128
- add %1, -128
- %else
- sub %1, %2
- %endif
- %else
- sub %1, %2
- %endif
-%endmacro
-
-;=============================================================================
-; AVX abstraction layer
-;=============================================================================
-
-%assign i 0
-%rep 16
- %if i < 8
- CAT_XDEFINE sizeofmm, i, 8
- %endif
- CAT_XDEFINE sizeofxmm, i, 16
- CAT_XDEFINE sizeofymm, i, 32
-%assign i i+1
-%endrep
-%undef i
-
-%macro CHECK_AVX_INSTR_EMU 3-*
- %xdefine %%opcode %1
- %xdefine %%dst %2
- %rep %0-2
- %ifidn %%dst, %3
- %error non-AVX emulation of ``%%opcode'' is not supported
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == number of operands given
-;%5+: operands
-%macro RUN_AVX_INSTR 6-7+
- %ifid %6
- %define %%sizeofreg sizeof%6
- %elifid %5
- %define %%sizeofreg sizeof%5
- %else
- %define %%sizeofreg mmsize
- %endif
- %if %%sizeofreg==32
- %if %4>=3
- v%1 %5, %6, %7
- %else
- v%1 %5, %6
- %endif
- %else
- %if %%sizeofreg==8
- %define %%regmov movq
- %elif %2
- %define %%regmov movaps
- %else
- %define %%regmov movdqa
- %endif
-
- %if %4>=3+%3
- %ifnidn %5, %6
- %if AVX_enabled && %%sizeofreg==16
- v%1 %5, %6, %7
- %else
- CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
- %%regmov %5, %6
- %1 %5, %7
- %endif
- %else
- %1 %5, %7
- %endif
- %elif %4>=3
- %1 %5, %6, %7
- %else
- %1 %5, %6
- %endif
- %endif
-%endmacro
-
-; 3arg AVX ops with a memory arg can only have it in src2,
-; whereas SSE emulation of 3arg prefers to have it in src1 (i.e. the mov).
-; So, if the op is symmetric and the wrong one is memory, swap them.
-%macro RUN_AVX_INSTR1 8
- %assign %%swap 0
- %if AVX_enabled
- %ifnid %6
- %assign %%swap 1
- %endif
- %elifnidn %5, %6
- %ifnid %7
- %assign %%swap 1
- %endif
- %endif
- %if %%swap && %3 == 0 && %8 == 1
- RUN_AVX_INSTR %1, %2, %3, %4, %5, %7, %6
- %else
- RUN_AVX_INSTR %1, %2, %3, %4, %5, %6, %7
- %endif
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
-%macro AVX_INSTR 4
- %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
- %ifidn %3, fnord
- RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
- %elifidn %4, fnord
- RUN_AVX_INSTR1 %6, %7, %8, 3, %1, %2, %3, %9
- %elifidn %5, fnord
- RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
- %else
- RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
- %endif
- %endmacro
-%endmacro
-
-AVX_INSTR addpd, 1, 0, 1
-AVX_INSTR addps, 1, 0, 1
-AVX_INSTR addsd, 1, 0, 1
-AVX_INSTR addss, 1, 0, 1
-AVX_INSTR addsubpd, 1, 0, 0
-AVX_INSTR addsubps, 1, 0, 0
-AVX_INSTR andpd, 1, 0, 1
-AVX_INSTR andps, 1, 0, 1
-AVX_INSTR andnpd, 1, 0, 0
-AVX_INSTR andnps, 1, 0, 0
-AVX_INSTR blendpd, 1, 0, 0
-AVX_INSTR blendps, 1, 0, 0
-AVX_INSTR blendvpd, 1, 0, 0
-AVX_INSTR blendvps, 1, 0, 0
-AVX_INSTR cmppd, 1, 0, 0
-AVX_INSTR cmpps, 1, 0, 0
-AVX_INSTR cmpsd, 1, 0, 0
-AVX_INSTR cmpss, 1, 0, 0
-AVX_INSTR cvtdq2ps, 1, 0, 0
-AVX_INSTR cvtps2dq, 1, 0, 0
-AVX_INSTR divpd, 1, 0, 0
-AVX_INSTR divps, 1, 0, 0
-AVX_INSTR divsd, 1, 0, 0
-AVX_INSTR divss, 1, 0, 0
-AVX_INSTR dppd, 1, 1, 0
-AVX_INSTR dpps, 1, 1, 0
-AVX_INSTR haddpd, 1, 0, 0
-AVX_INSTR haddps, 1, 0, 0
-AVX_INSTR hsubpd, 1, 0, 0
-AVX_INSTR hsubps, 1, 0, 0
-AVX_INSTR maxpd, 1, 0, 1
-AVX_INSTR maxps, 1, 0, 1
-AVX_INSTR maxsd, 1, 0, 1
-AVX_INSTR maxss, 1, 0, 1
-AVX_INSTR minpd, 1, 0, 1
-AVX_INSTR minps, 1, 0, 1
-AVX_INSTR minsd, 1, 0, 1
-AVX_INSTR minss, 1, 0, 1
-AVX_INSTR movhlps, 1, 0, 0
-AVX_INSTR movlhps, 1, 0, 0
-AVX_INSTR movsd, 1, 0, 0
-AVX_INSTR movss, 1, 0, 0
-AVX_INSTR mpsadbw, 0, 1, 0
-AVX_INSTR mulpd, 1, 0, 1
-AVX_INSTR mulps, 1, 0, 1
-AVX_INSTR mulsd, 1, 0, 1
-AVX_INSTR mulss, 1, 0, 1
-AVX_INSTR orpd, 1, 0, 1
-AVX_INSTR orps, 1, 0, 1
-AVX_INSTR pabsb, 0, 0, 0
-AVX_INSTR pabsw, 0, 0, 0
-AVX_INSTR pabsd, 0, 0, 0
-AVX_INSTR packsswb, 0, 0, 0
-AVX_INSTR packssdw, 0, 0, 0
-AVX_INSTR packuswb, 0, 0, 0
-AVX_INSTR packusdw, 0, 0, 0
-AVX_INSTR paddb, 0, 0, 1
-AVX_INSTR paddw, 0, 0, 1
-AVX_INSTR paddd, 0, 0, 1
-AVX_INSTR paddq, 0, 0, 1
-AVX_INSTR paddsb, 0, 0, 1
-AVX_INSTR paddsw, 0, 0, 1
-AVX_INSTR paddusb, 0, 0, 1
-AVX_INSTR paddusw, 0, 0, 1
-AVX_INSTR palignr, 0, 1, 0
-AVX_INSTR pand, 0, 0, 1
-AVX_INSTR pandn, 0, 0, 0
-AVX_INSTR pavgb, 0, 0, 1
-AVX_INSTR pavgw, 0, 0, 1
-AVX_INSTR pblendvb, 0, 0, 0
-AVX_INSTR pblendw, 0, 1, 0
-AVX_INSTR pcmpestri, 0, 0, 0
-AVX_INSTR pcmpestrm, 0, 0, 0
-AVX_INSTR pcmpistri, 0, 0, 0
-AVX_INSTR pcmpistrm, 0, 0, 0
-AVX_INSTR pcmpeqb, 0, 0, 1
-AVX_INSTR pcmpeqw, 0, 0, 1
-AVX_INSTR pcmpeqd, 0, 0, 1
-AVX_INSTR pcmpeqq, 0, 0, 1
-AVX_INSTR pcmpgtb, 0, 0, 0
-AVX_INSTR pcmpgtw, 0, 0, 0
-AVX_INSTR pcmpgtd, 0, 0, 0
-AVX_INSTR pcmpgtq, 0, 0, 0
-AVX_INSTR phaddw, 0, 0, 0
-AVX_INSTR phaddd, 0, 0, 0
-AVX_INSTR phaddsw, 0, 0, 0
-AVX_INSTR phsubw, 0, 0, 0
-AVX_INSTR phsubd, 0, 0, 0
-AVX_INSTR phsubsw, 0, 0, 0
-AVX_INSTR pmaddwd, 0, 0, 1
-AVX_INSTR pmaddubsw, 0, 0, 0
-AVX_INSTR pmaxsb, 0, 0, 1
-AVX_INSTR pmaxsw, 0, 0, 1
-AVX_INSTR pmaxsd, 0, 0, 1
-AVX_INSTR pmaxub, 0, 0, 1
-AVX_INSTR pmaxuw, 0, 0, 1
-AVX_INSTR pmaxud, 0, 0, 1
-AVX_INSTR pminsb, 0, 0, 1
-AVX_INSTR pminsw, 0, 0, 1
-AVX_INSTR pminsd, 0, 0, 1
-AVX_INSTR pminub, 0, 0, 1
-AVX_INSTR pminuw, 0, 0, 1
-AVX_INSTR pminud, 0, 0, 1
-AVX_INSTR pmovmskb, 0, 0, 0
-AVX_INSTR pmulhuw, 0, 0, 1
-AVX_INSTR pmulhrsw, 0, 0, 1
-AVX_INSTR pmulhw, 0, 0, 1
-AVX_INSTR pmullw, 0, 0, 1
-AVX_INSTR pmulld, 0, 0, 1
-AVX_INSTR pmuludq, 0, 0, 1
-AVX_INSTR pmuldq, 0, 0, 1
-AVX_INSTR por, 0, 0, 1
-AVX_INSTR psadbw, 0, 0, 1
-AVX_INSTR pshufb, 0, 0, 0
-AVX_INSTR pshufd, 0, 1, 0
-AVX_INSTR pshufhw, 0, 1, 0
-AVX_INSTR pshuflw, 0, 1, 0
-AVX_INSTR psignb, 0, 0, 0
-AVX_INSTR psignw, 0, 0, 0
-AVX_INSTR psignd, 0, 0, 0
-AVX_INSTR psllw, 0, 0, 0
-AVX_INSTR pslld, 0, 0, 0
-AVX_INSTR psllq, 0, 0, 0
-AVX_INSTR pslldq, 0, 0, 0
-AVX_INSTR psraw, 0, 0, 0
-AVX_INSTR psrad, 0, 0, 0
-AVX_INSTR psrlw, 0, 0, 0
-AVX_INSTR psrld, 0, 0, 0
-AVX_INSTR psrlq, 0, 0, 0
-AVX_INSTR psrldq, 0, 0, 0
-AVX_INSTR psubb, 0, 0, 0
-AVX_INSTR psubw, 0, 0, 0
-AVX_INSTR psubd, 0, 0, 0
-AVX_INSTR psubq, 0, 0, 0
-AVX_INSTR psubsb, 0, 0, 0
-AVX_INSTR psubsw, 0, 0, 0
-AVX_INSTR psubusb, 0, 0, 0
-AVX_INSTR psubusw, 0, 0, 0
-AVX_INSTR ptest, 0, 0, 0
-AVX_INSTR punpckhbw, 0, 0, 0
-AVX_INSTR punpckhwd, 0, 0, 0
-AVX_INSTR punpckhdq, 0, 0, 0
-AVX_INSTR punpckhqdq, 0, 0, 0
-AVX_INSTR punpcklbw, 0, 0, 0
-AVX_INSTR punpcklwd, 0, 0, 0
-AVX_INSTR punpckldq, 0, 0, 0
-AVX_INSTR punpcklqdq, 0, 0, 0
-AVX_INSTR pxor, 0, 0, 1
-AVX_INSTR shufps, 1, 1, 0
-AVX_INSTR subpd, 1, 0, 0
-AVX_INSTR subps, 1, 0, 0
-AVX_INSTR subsd, 1, 0, 0
-AVX_INSTR subss, 1, 0, 0
-AVX_INSTR unpckhpd, 1, 0, 0
-AVX_INSTR unpckhps, 1, 0, 0
-AVX_INSTR unpcklpd, 1, 0, 0
-AVX_INSTR unpcklps, 1, 0, 0
-AVX_INSTR xorpd, 1, 0, 1
-AVX_INSTR xorps, 1, 0, 1
-
-; 3DNow instructions, for sharing code between AVX, SSE and 3DN
-AVX_INSTR pfadd, 1, 0, 1
-AVX_INSTR pfsub, 1, 0, 0
-AVX_INSTR pfmul, 1, 0, 1
-
-; base-4 constants for shuffles
-%assign i 0
-%rep 256
- %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
- %if j < 10
- CAT_XDEFINE q000, j, i
- %elif j < 100
- CAT_XDEFINE q00, j, i
- %elif j < 1000
- CAT_XDEFINE q0, j, i
- %else
- CAT_XDEFINE q, j, i
- %endif
-%assign i i+1
-%endrep
-%undef i
-%undef j
-
-%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
- %else
- %6 %1, %2, %3
- %7 %1, %4
- %endif
- %endmacro
-%endmacro
-
-FMA_INSTR pmacsdd, pmulld, paddd
-FMA_INSTR pmacsww, pmullw, paddw
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
-; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
-; This lets us use tzcnt without bumping the yasm version requirement yet.
-%define tzcnt rep bsf
diff --git a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c b/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c
deleted file mode 100755
index 3712a3b6d6..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef _YUV_LIBYUV
-#include <libyuv.h>
-#include "yuv_util.h"
-#include "yuv_libyuv.h"
-
-void decodeRGB(struct TheoraPixelTransform* t)
-{
- I420ToRAW(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 3, t->w, t->h);
-}
-
-void decodeRGBA(struct TheoraPixelTransform* t)
-{
- I420ToABGR(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
- _decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeRGBX(struct TheoraPixelTransform* t)
-{
- I420ToABGR(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void decodeARGB(struct TheoraPixelTransform* t)
-{
- I420ToBGRA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
- _decodeAlpha(t, t->w * 4);
-}
-
-void decodeXRGB(struct TheoraPixelTransform* t)
-{
- I420ToBGRA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void decodeBGR(struct TheoraPixelTransform* t)
-{
- I420ToRGB24(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 3, t->w, t->h);
-}
-
-void decodeBGRA(struct TheoraPixelTransform* t)
-{
- I420ToARGB(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
- _decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeBGRX(struct TheoraPixelTransform* t)
-{
- I420ToARGB(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void decodeABGR(struct TheoraPixelTransform* t)
-{
- I420ToRGBA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
- _decodeAlpha(t, t->w * 4);
-}
-
-void decodeXBGR(struct TheoraPixelTransform* t)
-{
- I420ToRGBA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void initYUVConversionModule()
-{
-
-}
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h b/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h
deleted file mode 100755
index f621af0c5f..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _YUV_LIBYUV_h
-#define _YUV_LIBYUV_h
-
-#include "TheoraPixelTransform.h"
-
-#endif
diff --git a/drivers/theoraplayer/src/YUV/yuv_util.c b/drivers/theoraplayer/src/YUV/yuv_util.c
deleted file mode 100644
index f5bf3e5f9e..0000000000
--- a/drivers/theoraplayer/src/YUV/yuv_util.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "yuv_util.h"
-
-struct TheoraPixelTransform* incOut(struct TheoraPixelTransform* t, int n)
-{
- // used for XRGB, XBGR and similar
- t->out += n;
- return t;
-}
-
-void _decodeAlpha(struct TheoraPixelTransform* t, int stride)
-{
- int width = t->w;
- unsigned char *ySrc, *yLineEnd, *out;
- int luma;
- unsigned int y;
- for (y = 0; y < t->h; y++)
- {
- ySrc = t->y + y * t->yStride + width;
- out = t->out + y * stride;
-
- for (yLineEnd = ySrc + width; ySrc != yLineEnd; ++ySrc, out += 4)
- {
- luma = (*ySrc);
- // because in YCbCr specification, luma values are in the range of [16, 235]
- // account for 'footroom' and 'headroom' ranges while using luma values as alpha channel
- if (luma <= 16) *out = 0;
- else if (luma >= 235) *out = 255;
- else *out = (unsigned char) (((luma - 16) * 255) / 219);
- }
- }
-}
diff --git a/drivers/theoraplayer/src/YUV/yuv_util.h b/drivers/theoraplayer/src/YUV/yuv_util.h
deleted file mode 100644
index 1f9d76634a..0000000000
--- a/drivers/theoraplayer/src/YUV/yuv_util.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _YUV_UTIL_h
-#define _YUV_UTIL_h
-
-#include "TheoraPixelTransform.h"
-
-struct TheoraPixelTransform* incOut(struct TheoraPixelTransform* t, int n);
-void _decodeAlpha(struct TheoraPixelTransform* t, int stride);
-
-#endif
diff --git a/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj b/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj
deleted file mode 100644
index 23f875fe0c..0000000000
--- a/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj
+++ /dev/null
@@ -1,2606 +0,0 @@
-// !$*UTF8*$!
-{
- archiveVersion = 1;
- classes = {
- };
- objectVersion = 46;
- objects = {
-
-/* Begin PBXBuildFile section */
- D139462D17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139462E17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139462F17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139463017C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139463117C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139463217C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139463317C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139463417C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D139463617C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463717C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463817C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463917C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463A17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463B17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463C17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463D17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D139463E17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
- D13946C617C110670091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
- D13946CC17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946CD17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946CE17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946CF17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946D017C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946D117C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946D217C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946D317C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946D417C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
- D13946D517C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946D617C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946D717C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946D817C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946D917C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946DA17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946DB17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946DC17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D13946DD17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
- D159BCB017C227F30030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB117C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB217C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB317C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB417C227F50030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB517C227F50030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB617C227F60030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB717C227F60030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB817C227F70030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
- D159BCB917C228310030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCBA17C228320030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCBB17C228320030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCBC17C228330030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCBD17C228330030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCBE17C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCBF17C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCC017C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCC117C228350030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
- D159BCC217C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCC317C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCC417C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCC517C2286E0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCC617C2286E0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCC717C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCC817C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCC917C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D159BCCA17C228700030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
- D15D361017C386A600F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D15D361117C386A600F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D15D361217C386A700F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D15D361317C386B100F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D15D361517C386B300F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D15D361617C386B400F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D16775AB155C501D0050EC64 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D16775AC155C501D0050EC64 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D16775AD155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D16775AE155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D16775AF155C501D0050EC64 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D16775B0155C501D0050EC64 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D16775B1155C501D0050EC64 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D16775B2155C501D0050EC64 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D16775B3155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D16775B4155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D16775B5155C501D0050EC64 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D16775B6155C501D0050EC64 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D16775B7155C501D0050EC64 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D16775B8155C501D0050EC64 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D16775B9155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D16775BA155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D16775BB155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D16775BC155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D16775BD155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D16775BE155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D16775BF155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D16775C0155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D16775CE155C50280050EC64 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775CF155C50280050EC64 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
- D16775D0155C50280050EC64 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775D1155C50280050EC64 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
- D16775D2155C50280050EC64 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775D3155C50280050EC64 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
- D16775D4155C50280050EC64 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775D5155C50280050EC64 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
- D16775D6155C50280050EC64 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775D7155C50280050EC64 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
- D16775D8155C50280050EC64 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775D9155C50280050EC64 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
- D16775DA155C50280050EC64 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775DB155C50280050EC64 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
- D16775DC155C50280050EC64 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775DD155C50280050EC64 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
- D16775DE155C50280050EC64 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775DF155C50280050EC64 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
- D16775E0155C50280050EC64 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775E1155C50280050EC64 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
- D16775E2155C50280050EC64 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775E3155C50280050EC64 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
- D16775E4155C50280050EC64 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775E5155C50280050EC64 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
- D16775E6155C50280050EC64 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D16775E7155C50280050EC64 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
- D198F952177A31FC002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D198F953177A31FC002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D198F954177A31FC002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D198F955177A31FC002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D198F956177A31FC002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D198F957177A31FC002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D198F958177A31FC002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D198F959177A31FC002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D198F95A177A31FC002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D198F95B177A31FC002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D198F95C177A31FC002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D198F95D177A31FC002942E3 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
- D198F95F177A31FC002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D198F960177A31FC002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D198F961177A31FC002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D198F962177A31FC002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D198F965177A31FC002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
- D198F966177A31FC002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
- D198F967177A31FC002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
- D198F968177A31FC002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
- D198F969177A31FC002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
- D198F96A177A31FC002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
- D198F96B177A31FC002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
- D198F96C177A31FC002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
- D198F96D177A31FC002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
- D198F96E177A31FC002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
- D198F96F177A31FC002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
- D198F970177A31FC002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
- D198F971177A31FC002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
- D198F972177A31FC002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D198F974177A31FC002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
- D198F97E177A31FE002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D198F97F177A31FE002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D198F980177A31FE002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D198F981177A31FE002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D198F982177A31FE002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D198F983177A31FE002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D198F984177A31FE002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D198F985177A31FE002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D198F986177A31FE002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D198F987177A31FE002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D198F988177A31FE002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D198F989177A31FE002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
- D198F98B177A31FE002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D198F98C177A31FE002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D198F98D177A31FE002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D198F98E177A31FE002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D198F991177A31FE002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
- D198F992177A31FE002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
- D198F993177A31FE002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
- D198F994177A31FE002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
- D198F995177A31FE002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
- D198F996177A31FE002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
- D198F997177A31FE002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
- D198F998177A31FE002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
- D198F999177A31FE002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
- D198F99A177A31FE002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
- D198F99B177A31FE002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
- D198F99C177A31FE002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
- D198F99D177A31FE002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
- D198F99E177A31FE002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D198F9A0177A31FE002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
- D198F9AA177A3200002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D198F9AB177A3200002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D198F9AC177A3200002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D198F9AD177A3200002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D198F9AE177A3200002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D198F9AF177A3200002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D198F9B0177A3200002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D198F9B1177A3200002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D198F9B2177A3200002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D198F9B3177A3200002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D198F9B4177A3200002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D198F9B5177A3200002942E3 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
- D198F9B6177A3200002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
- D198F9B8177A3200002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D198F9B9177A3200002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D198F9BA177A3200002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D198F9BB177A3200002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D198F9BE177A3200002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
- D198F9BF177A3200002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
- D198F9C0177A3200002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
- D198F9C1177A3200002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
- D198F9C2177A3200002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
- D198F9C3177A3200002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
- D198F9C4177A3200002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
- D198F9C5177A3200002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
- D198F9C6177A3200002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
- D198F9C7177A3200002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
- D198F9C8177A3200002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
- D198F9C9177A3200002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
- D198F9CA177A3200002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
- D198F9CB177A3200002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D198F9CD177A3200002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
- D1BCE05A18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE05B18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE05C18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE05D18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE05E18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE05F18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE06018F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE06118F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE06218F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
- D1BCE06318F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06418F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06518F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06618F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06718F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06818F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06918F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06A18F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1BCE06B18F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
- D1C3D07217C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07317C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07417C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07517C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07617C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07717C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07817C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07917C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D07A17C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
- D1C3D08117C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; };
- D1C3D08217C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; };
- D1C3D08317C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; };
- D1C3D08417C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D1C3D08517C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D1C3D08617C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D1C3D08717C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D1C3D08817C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D1C3D08917C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
- D1C3D09617C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09717C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09817C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09917C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09A17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09B17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09C17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09D17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09E17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
- D1C3D09F17C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A017C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A117C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A217C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A317C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A417C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A517C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A617C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0A717C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
- D1C3D0C317C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0C417C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0C517C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0C617C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0C717C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0C817C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0C917C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0CA17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0CB17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
- D1C3D0CC17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0CD17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0CE17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0CF17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0D017C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0D117C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0D217C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0D317C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0D417C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
- D1C3D0D517C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0D617C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0D717C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0D817C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0D917C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0DA17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0DB17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0DC17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0DD17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
- D1C3D0DE17C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0DF17C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E017C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E117C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E217C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E317C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E417C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E517C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E617C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
- D1C3D0E717C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0E817C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0E917C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0EA17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0EB17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0EC17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0ED17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0EE17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D0EF17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
- D1C3D10217C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10317C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10417C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10517C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10617C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10717C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10817C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10917C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D10A17C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
- D1C3D12317C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; };
- D1C3D12417C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; };
- D1C3D12517C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; };
- D1C3D12617C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12717C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12817C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12917C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12A17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12B17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12C17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12D17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12E17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
- D1C3D12F17C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13017C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13117C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13217C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13317C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13417C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13517C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13617C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13717C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
- D1C3D13817C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D13917C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D13A17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D13B17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D13C17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D13D17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D13E17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D13F17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D14017C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
- D1C3D15017C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; };
- D1C3D15117C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; };
- D1C3D15217C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; };
- D1C3D15317C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D1C3D15417C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D1C3D15517C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D1C3D15617C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D1C3D15717C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D1C3D15817C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
- D1C3D17417C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; };
- D1C3D17517C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; };
- D1C3D17617C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; };
- D1C3D17717C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17817C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17917C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17A17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17B17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17C17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17D17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17E17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D17F17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
- D1C3D18F17C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; };
- D1C3D19017C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; };
- D1C3D19117C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; };
- D1C3D19B17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D19C17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D19D17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D19E17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D19F17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D1A017C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D1A117C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D1A217C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1C3D1A317C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
- D1CD00001696FC0B00609AB0 /* Theora.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFFB1696FC0100609AB0 /* Theora.framework */; };
- D1CD00011696FC0B00609AB0 /* Vorbis.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF91696FBF700609AB0 /* Vorbis.framework */; };
- D1CD00021696FC0B00609AB0 /* Ogg.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF71696FBF400609AB0 /* Ogg.framework */; };
- D1CD00041696FF9400609AB0 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CD00031696FF9400609AB0 /* CoreMedia.framework */; };
- D1CD00051696FF9600609AB0 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CD00031696FF9400609AB0 /* CoreMedia.framework */; };
- D1CDFF241696C77A00609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D1CDFF251696C77A00609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D1CDFF261696C77A00609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D1CDFF271696C77A00609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D1CDFF281696C77A00609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D1CDFF291696C77A00609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D1CDFF2A1696C77A00609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D1CDFF2B1696C77A00609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D1CDFF2C1696C77A00609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D1CDFF2D1696C77A00609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D1CDFF2E1696C77A00609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D1CDFF341696C77A00609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF351696C77A00609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF361696C77A00609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF371696C77A00609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF381696C77A00609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF391696C77A00609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF3A1696C77A00609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF3B1696C77A00609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF3C1696C77A00609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF3D1696C77A00609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF3E1696C77A00609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF3F1696C77A00609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF401696C77A00609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF4C1696C79700609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D1CDFF4D1696C79700609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D1CDFF4E1696C79700609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D1CDFF4F1696C79700609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D1CDFF501696C79700609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D1CDFF511696C79700609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D1CDFF521696C79700609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D1CDFF531696C79700609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D1CDFF541696C79700609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D1CDFF551696C79700609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D1CDFF561696C79700609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D1CDFF5C1696C79700609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF5D1696C79700609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF5E1696C79700609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF5F1696C79700609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF601696C79700609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF611696C79700609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF621696C79700609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF631696C79700609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF641696C79700609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF651696C79700609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF661696C79700609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF671696C79700609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF681696C79700609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1CDFF961696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
- D1CDFF971696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
- D1CDFF981696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
- D1CDFF991696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D1CDFF9A1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D1CDFF9B1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D1CDFF9E1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
- D1CDFF9F1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */; };
- D1CDFFA21696E1CA00609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D1CDFFA31696E1CA00609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D1CDFFA41696E1CA00609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D1CDFFA51696E1CA00609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D1CDFFA61696E1CA00609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D1CDFFA71696E1CA00609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D1CDFFA81696E1CA00609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D1CDFFA91696E1CA00609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D1CDFFAA1696E1CA00609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D1CDFFAB1696E1CA00609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D1CDFFAC1696E1CA00609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D1CDFFB01696E1CA00609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
- D1CDFFB11696E1CA00609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
- D1CDFFB21696E1CA00609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
- D1CDFFB31696E1CA00609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
- D1CDFFB41696E1CA00609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
- D1CDFFB51696E1CA00609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
- D1CDFFB61696E1CA00609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
- D1CDFFB71696E1CA00609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
- D1CDFFB81696E1CA00609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
- D1CDFFB91696E1CA00609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
- D1CDFFBA1696E1CA00609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
- D1CDFFBB1696E1CA00609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
- D1CDFFBC1696E1CA00609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
- D1CDFFBD1696E1CA00609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D1CDFFC71696E1D700609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
- D1CDFFC81696E1D700609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
- D1CDFFC91696E1D700609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
- D1CDFFCA1696E1D700609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
- D1CDFFCB1696E1D700609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
- D1CDFFCC1696E1D700609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
- D1CDFFCD1696E1D700609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
- D1CDFFCE1696E1D700609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
- D1CDFFCF1696E1D700609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
- D1CDFFD01696E1D700609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
- D1CDFFD11696E1D700609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
- D1CDFFD21696E1D700609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
- D1CDFFD51696E1D700609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
- D1CDFFD61696E1D700609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
- D1CDFFD71696E1D700609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
- D1CDFFD81696E1D700609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
- D1CDFFD91696E1D700609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
- D1CDFFDA1696E1D700609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
- D1CDFFDB1696E1D700609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
- D1CDFFDC1696E1D700609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
- D1CDFFDD1696E1D700609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
- D1CDFFDE1696E1D700609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
- D1CDFFDF1696E1D700609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
- D1CDFFE01696E1D700609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
- D1CDFFE11696E1D700609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
- D1CDFFE21696E1D700609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
- D1CDFFEA1696E24B00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
- D1CDFFEB1696E24C00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
- D1CDFFEC1696E24F00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
- D1CDFFEE1696FB7200609AB0 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */; };
- D1CDFFF11696FB8900609AB0 /* CoreVideo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */; };
- D1CDFFF31696FBA800609AB0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF21696FBA800609AB0 /* Foundation.framework */; };
- D1CDFFF41696FBB200609AB0 /* CoreVideo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */; };
- D1CDFFF51696FBB200609AB0 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */; };
- D1CDFFF61696FBB200609AB0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF21696FBA800609AB0 /* Foundation.framework */; };
- D1CDFFFD1696FC0800609AB0 /* Theora.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFFB1696FC0100609AB0 /* Theora.framework */; };
- D1CDFFFE1696FC0800609AB0 /* Vorbis.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF91696FBF700609AB0 /* Vorbis.framework */; };
- D1CDFFFF1696FC0800609AB0 /* Ogg.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF71696FBF400609AB0 /* Ogg.framework */; };
- D1D465D616C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1D465D716C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1D465D816C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1D465DA16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D1D465DB16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D1D465DC16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D1D465DD16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D1D465DE16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D1D465DF16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
- D1E2719916B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D1E2719A16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D1E2719B16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D1E2719C16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D1E2719D16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D1E2719E16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
- D1E271A516B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D1E271A616B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D1E271A716B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D1E271A816B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D1E271A916B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D1E271AA16B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
- D1E271AC16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D1E271AD16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D1E271AE16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D1E271AF16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D1E271B016B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D1E271B116B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
- D1E271B316B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1E271B416B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1E271B516B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; };
- D1E271B616B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
- D1E271B716B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
- D1E271B816B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
- D1F09EB1169AFEFB00DEEC63 /* TheoraVideoClip_AVFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */; };
-/* End PBXBuildFile section */
-
-/* Begin PBXFileReference section */
- D12CA55517734B4200412E5B /* TheoraVideoClip_FFmpeg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip_FFmpeg.cpp; path = src/FFmpeg/TheoraVideoClip_FFmpeg.cpp; sourceTree = "<group>"; };
- D12CA55617734B4200412E5B /* TheoraVideoClip_FFmpeg.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_FFmpeg.h; path = src/FFmpeg/TheoraVideoClip_FFmpeg.h; sourceTree = "<group>"; };
- D1358BC218D7777200A36FDC /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
- D1358BC318D7777800A36FDC /* iOS.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = iOS.xcconfig; path = xcconfig/iOS.xcconfig; sourceTree = "<group>"; };
- D1358BC418D7777800A36FDC /* Mac.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = Mac.xcconfig; path = xcconfig/Mac.xcconfig; sourceTree = "<group>"; };
- D139462B17C0ED450091F4A4 /* yuv_libyuv.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv_libyuv.c; path = src/YUV/libyuv/yuv_libyuv.c; sourceTree = "<group>"; };
- D139462C17C0ED450091F4A4 /* yuv_libyuv.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = yuv_libyuv.h; path = src/YUV/libyuv/yuv_libyuv.h; sourceTree = "<group>"; };
- D13946CA17C119B30091F4A4 /* yuv_util.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv_util.c; path = src/YUV/yuv_util.c; sourceTree = "<group>"; };
- D13946CB17C119B30091F4A4 /* yuv_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = yuv_util.h; path = src/YUV/yuv_util.h; sourceTree = "<group>"; };
- D1473F2A150CA69B00B20490 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; };
- D159BCAB17C227940030FAB6 /* compare_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = compare_win.cc; path = src/YUV/libyuv/src/compare_win.cc; sourceTree = "<group>"; };
- D159BCAC17C227940030FAB6 /* row_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = row_win.cc; path = src/YUV/libyuv/src/row_win.cc; sourceTree = "<group>"; };
- D159BCAD17C227940030FAB6 /* row_x86.asm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm.asm; name = row_x86.asm; path = src/YUV/libyuv/src/row_x86.asm; sourceTree = "<group>"; };
- D159BCAE17C227940030FAB6 /* x86inc.asm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm.asm; name = x86inc.asm; path = src/YUV/libyuv/src/x86inc.asm; sourceTree = "<group>"; };
- D167759E155C501D0050EC64 /* TheoraAsync.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAsync.cpp; path = src/TheoraAsync.cpp; sourceTree = "<group>"; };
- D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAudioInterface.cpp; path = src/TheoraAudioInterface.cpp; sourceTree = "<group>"; };
- D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraDataSource.cpp; path = src/TheoraDataSource.cpp; sourceTree = "<group>"; };
- D16775A1155C501D0050EC64 /* TheoraException.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraException.cpp; path = src/TheoraException.cpp; sourceTree = "<group>"; };
- D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraFrameQueue.cpp; path = src/TheoraFrameQueue.cpp; sourceTree = "<group>"; };
- D16775A3155C501D0050EC64 /* TheoraTimer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraTimer.cpp; path = src/TheoraTimer.cpp; sourceTree = "<group>"; };
- D16775A4155C501D0050EC64 /* TheoraUtil.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraUtil.cpp; path = src/TheoraUtil.cpp; sourceTree = "<group>"; };
- D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip.cpp; path = src/TheoraVideoClip.cpp; sourceTree = "<group>"; };
- D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoFrame.cpp; path = src/TheoraVideoFrame.cpp; sourceTree = "<group>"; };
- D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoManager.cpp; path = src/TheoraVideoManager.cpp; sourceTree = "<group>"; };
- D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraWorkerThread.cpp; path = src/TheoraWorkerThread.cpp; sourceTree = "<group>"; };
- D16775C1155C50280050EC64 /* TheoraAsync.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAsync.h; path = include/theoraplayer/TheoraAsync.h; sourceTree = "<group>"; };
- D16775C2155C50280050EC64 /* TheoraAudioInterface.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAudioInterface.h; path = include/theoraplayer/TheoraAudioInterface.h; sourceTree = "<group>"; };
- D16775C3155C50280050EC64 /* TheoraDataSource.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraDataSource.h; path = include/theoraplayer/TheoraDataSource.h; sourceTree = "<group>"; };
- D16775C4155C50280050EC64 /* TheoraException.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraException.h; path = include/theoraplayer/TheoraException.h; sourceTree = "<group>"; };
- D16775C5155C50280050EC64 /* TheoraExport.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraExport.h; path = include/theoraplayer/TheoraExport.h; sourceTree = "<group>"; };
- D16775C6155C50280050EC64 /* TheoraFrameQueue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraFrameQueue.h; path = include/theoraplayer/TheoraFrameQueue.h; sourceTree = "<group>"; };
- D16775C7155C50280050EC64 /* TheoraPlayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraPlayer.h; path = include/theoraplayer/TheoraPlayer.h; sourceTree = "<group>"; };
- D16775C8155C50280050EC64 /* TheoraTimer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraTimer.h; path = include/theoraplayer/TheoraTimer.h; sourceTree = "<group>"; };
- D16775C9155C50280050EC64 /* TheoraUtil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraUtil.h; path = include/theoraplayer/TheoraUtil.h; sourceTree = "<group>"; };
- D16775CA155C50280050EC64 /* TheoraVideoClip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip.h; path = include/theoraplayer/TheoraVideoClip.h; sourceTree = "<group>"; };
- D16775CB155C50280050EC64 /* TheoraVideoFrame.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoFrame.h; path = include/theoraplayer/TheoraVideoFrame.h; sourceTree = "<group>"; };
- D16775CC155C50280050EC64 /* TheoraVideoManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoManager.h; path = include/theoraplayer/TheoraVideoManager.h; sourceTree = "<group>"; };
- D16775CD155C50280050EC64 /* TheoraWorkerThread.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraWorkerThread.h; path = include/theoraplayer/TheoraWorkerThread.h; sourceTree = "<group>"; };
- D198F97B177A31FC002942E3 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
- D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer_avfoundation.a; sourceTree = BUILT_PRODUCTS_DIR; };
- D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer_theora_avfoundation.a; sourceTree = BUILT_PRODUCTS_DIR; };
- D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
- D1BCE05718F3F7D800C83470 /* scale_row.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale_row.h; path = src/YUV/libyuv/include/libyuv/scale_row.h; sourceTree = "<group>"; };
- D1BCE05818F3F7FE00C83470 /* scale_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_common.cc; path = src/YUV/libyuv/src/scale_common.cc; sourceTree = "<group>"; };
- D1BCE05918F3F7FE00C83470 /* scale_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_posix.cc; path = src/YUV/libyuv/src/scale_posix.cc; sourceTree = "<group>"; };
- D1BCE06C18F3F80800C83470 /* scale_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = scale_win.cc; path = src/YUV/libyuv/src/scale_win.cc; sourceTree = "<group>"; };
- D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_common.cc; path = src/YUV/libyuv/src/compare_common.cc; sourceTree = "<group>"; };
- D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_neon.cc; path = src/YUV/libyuv/src/compare_neon.cc; sourceTree = "<group>"; };
- D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_posix.cc; path = src/YUV/libyuv/src/compare_posix.cc; sourceTree = "<group>"; };
- D1C3D05317C157CD00CA0FD2 /* compare.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare.cc; path = src/YUV/libyuv/src/compare.cc; sourceTree = "<group>"; };
- D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_argb.cc; path = src/YUV/libyuv/src/convert_argb.cc; sourceTree = "<group>"; };
- D1C3D05517C157CD00CA0FD2 /* convert_from_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_from_argb.cc; path = src/YUV/libyuv/src/convert_from_argb.cc; sourceTree = "<group>"; };
- D1C3D05617C157CD00CA0FD2 /* convert_from.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_from.cc; path = src/YUV/libyuv/src/convert_from.cc; sourceTree = "<group>"; };
- D1C3D05717C157CD00CA0FD2 /* convert_jpeg.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_jpeg.cc; path = src/YUV/libyuv/src/convert_jpeg.cc; sourceTree = "<group>"; };
- D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_to_argb.cc; path = src/YUV/libyuv/src/convert_to_argb.cc; sourceTree = "<group>"; };
- D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_to_i420.cc; path = src/YUV/libyuv/src/convert_to_i420.cc; sourceTree = "<group>"; };
- D1C3D05A17C157CD00CA0FD2 /* convert.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert.cc; path = src/YUV/libyuv/src/convert.cc; sourceTree = "<group>"; };
- D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = cpu_id.cc; path = src/YUV/libyuv/src/cpu_id.cc; sourceTree = "<group>"; };
- D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = format_conversion.cc; path = src/YUV/libyuv/src/format_conversion.cc; sourceTree = "<group>"; };
- D1C3D05D17C157CD00CA0FD2 /* mjpeg_decoder.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mjpeg_decoder.cc; path = src/YUV/libyuv/src/mjpeg_decoder.cc; sourceTree = "<group>"; };
- D1C3D05E17C157CD00CA0FD2 /* mjpeg_validate.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mjpeg_validate.cc; path = src/YUV/libyuv/src/mjpeg_validate.cc; sourceTree = "<group>"; };
- D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = planar_functions.cc; path = src/YUV/libyuv/src/planar_functions.cc; sourceTree = "<group>"; };
- D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_argb.cc; path = src/YUV/libyuv/src/rotate_argb.cc; sourceTree = "<group>"; };
- D1C3D06117C157CD00CA0FD2 /* rotate_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_mips.cc; path = src/YUV/libyuv/src/rotate_mips.cc; sourceTree = "<group>"; };
- D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_neon.cc; path = src/YUV/libyuv/src/rotate_neon.cc; sourceTree = "<group>"; };
- D1C3D06317C157CD00CA0FD2 /* rotate.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate.cc; path = src/YUV/libyuv/src/rotate.cc; sourceTree = "<group>"; };
- D1C3D06417C157CD00CA0FD2 /* row_any.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_any.cc; path = src/YUV/libyuv/src/row_any.cc; sourceTree = "<group>"; };
- D1C3D06517C157CD00CA0FD2 /* row_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_common.cc; path = src/YUV/libyuv/src/row_common.cc; sourceTree = "<group>"; };
- D1C3D06617C157CD00CA0FD2 /* row_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_mips.cc; path = src/YUV/libyuv/src/row_mips.cc; sourceTree = "<group>"; };
- D1C3D06717C157CD00CA0FD2 /* row_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_neon.cc; path = src/YUV/libyuv/src/row_neon.cc; sourceTree = "<group>"; };
- D1C3D06817C157CD00CA0FD2 /* row_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_posix.cc; path = src/YUV/libyuv/src/row_posix.cc; sourceTree = "<group>"; };
- D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_argb_neon.cc; path = src/YUV/libyuv/src/scale_argb_neon.cc; sourceTree = "<group>"; };
- D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_argb.cc; path = src/YUV/libyuv/src/scale_argb.cc; sourceTree = "<group>"; };
- D1C3D06D17C157CD00CA0FD2 /* scale_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_mips.cc; path = src/YUV/libyuv/src/scale_mips.cc; sourceTree = "<group>"; };
- D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_neon.cc; path = src/YUV/libyuv/src/scale_neon.cc; sourceTree = "<group>"; };
- D1C3D06F17C157CD00CA0FD2 /* scale.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale.cc; path = src/YUV/libyuv/src/scale.cc; sourceTree = "<group>"; };
- D1C3D07017C157CD00CA0FD2 /* video_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = video_common.cc; path = src/YUV/libyuv/src/video_common.cc; sourceTree = "<group>"; };
- D1C3D1CE17C15BB400CA0FD2 /* libyuv.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = libyuv.h; path = src/YUV/libyuv/include/libyuv.h; sourceTree = "<group>"; };
- D1C3D1CF17C15BC100CA0FD2 /* basic_types.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = basic_types.h; path = src/YUV/libyuv/include/libyuv/basic_types.h; sourceTree = "<group>"; };
- D1C3D1D017C15BC100CA0FD2 /* compare.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = compare.h; path = src/YUV/libyuv/include/libyuv/compare.h; sourceTree = "<group>"; };
- D1C3D1D117C15BC100CA0FD2 /* convert_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_argb.h; path = src/YUV/libyuv/include/libyuv/convert_argb.h; sourceTree = "<group>"; };
- D1C3D1D217C15BC100CA0FD2 /* convert_from_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_from_argb.h; path = src/YUV/libyuv/include/libyuv/convert_from_argb.h; sourceTree = "<group>"; };
- D1C3D1D317C15BC100CA0FD2 /* convert_from.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_from.h; path = src/YUV/libyuv/include/libyuv/convert_from.h; sourceTree = "<group>"; };
- D1C3D1D417C15BC100CA0FD2 /* convert.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert.h; path = src/YUV/libyuv/include/libyuv/convert.h; sourceTree = "<group>"; };
- D1C3D1D517C15BC100CA0FD2 /* cpu_id.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = cpu_id.h; path = src/YUV/libyuv/include/libyuv/cpu_id.h; sourceTree = "<group>"; };
- D1C3D1D617C15BC100CA0FD2 /* format_conversion.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = format_conversion.h; path = src/YUV/libyuv/include/libyuv/format_conversion.h; sourceTree = "<group>"; };
- D1C3D1D717C15BC100CA0FD2 /* mjpeg_decoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = mjpeg_decoder.h; path = src/YUV/libyuv/include/libyuv/mjpeg_decoder.h; sourceTree = "<group>"; };
- D1C3D1D817C15BC100CA0FD2 /* planar_functions.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = planar_functions.h; path = src/YUV/libyuv/include/libyuv/planar_functions.h; sourceTree = "<group>"; };
- D1C3D1D917C15BC100CA0FD2 /* rotate_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = rotate_argb.h; path = src/YUV/libyuv/include/libyuv/rotate_argb.h; sourceTree = "<group>"; };
- D1C3D1DA17C15BC100CA0FD2 /* rotate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = rotate.h; path = src/YUV/libyuv/include/libyuv/rotate.h; sourceTree = "<group>"; };
- D1C3D1DB17C15BC100CA0FD2 /* row.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = row.h; path = src/YUV/libyuv/include/libyuv/row.h; sourceTree = "<group>"; };
- D1C3D1DC17C15BC100CA0FD2 /* scale_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale_argb.h; path = src/YUV/libyuv/include/libyuv/scale_argb.h; sourceTree = "<group>"; };
- D1C3D1DD17C15BC100CA0FD2 /* scale.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale.h; path = src/YUV/libyuv/include/libyuv/scale.h; sourceTree = "<group>"; };
- D1C3D1DE17C15BC100CA0FD2 /* version.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = version.h; path = src/YUV/libyuv/include/libyuv/version.h; sourceTree = "<group>"; };
- D1C3D1DF17C15BC100CA0FD2 /* video_common.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = video_common.h; path = src/YUV/libyuv/include/libyuv/video_common.h; sourceTree = "<group>"; };
- D1CD00031696FF9400609AB0 /* CoreMedia.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreMedia.framework; path = System/Library/Frameworks/CoreMedia.framework; sourceTree = SDKROOT; };
- D1CDFF481696C77A00609AB0 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; };
- D1CDFF701696C79700609AB0 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; };
- D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip_Theora.cpp; path = src/Theora/TheoraVideoClip_Theora.cpp; sourceTree = "<group>"; };
- D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_Theora.h; path = src/Theora/TheoraVideoClip_Theora.h; sourceTree = "<group>"; };
- D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = TheoraVideoClip_AVFoundation.mm; path = src/AVFoundation/TheoraVideoClip_AVFoundation.mm; sourceTree = "<group>"; };
- D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_AVFoundation.h; path = src/AVFoundation/TheoraVideoClip_AVFoundation.h; sourceTree = "<group>"; };
- D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
- D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
- D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; };
- D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreVideo.framework; path = System/Library/Frameworks/CoreVideo.framework; sourceTree = SDKROOT; };
- D1CDFFF21696FBA800609AB0 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
- D1CDFFF71696FBF400609AB0 /* Ogg.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Ogg.framework; path = ../__build__/products/Debug/Ogg.framework; sourceTree = "<group>"; };
- D1CDFFF91696FBF700609AB0 /* Vorbis.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Vorbis.framework; path = ../__build__/products/Debug/Vorbis.framework; sourceTree = "<group>"; };
- D1CDFFFB1696FC0100609AB0 /* Theora.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Theora.framework; path = ../__build__/products/Debug/Theora.framework; sourceTree = "<group>"; };
- D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAudioPacketQueue.h; path = include/theoraplayer/TheoraAudioPacketQueue.h; sourceTree = "<group>"; };
- D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAudioPacketQueue.cpp; path = src/TheoraAudioPacketQueue.cpp; sourceTree = "<group>"; };
- D1E2718A16B46F640046C00C /* yuv420_grey_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_grey_c.c; path = src/YUV/C/yuv420_grey_c.c; sourceTree = "<group>"; };
- D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_yuv_c.c; path = src/YUV/C/yuv420_yuv_c.c; sourceTree = "<group>"; };
- D1E271AB16B470210046C00C /* yuv420_rgb_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_rgb_c.c; path = src/YUV/C/yuv420_rgb_c.c; sourceTree = "<group>"; };
- D1E271B216B471E80046C00C /* TheoraPixelTransform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraPixelTransform.h; path = include/theoraplayer/TheoraPixelTransform.h; sourceTree = "<group>"; };
- D1F4DA1D18FECACE007C1968 /* cpu-features.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "cpu-features.c"; path = "src/YUV/android/cpu-features.c"; sourceTree = "<group>"; };
- D1F4DA1E18FECACE007C1968 /* cpu-features.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "cpu-features.h"; path = "src/YUV/android/cpu-features.h"; sourceTree = "<group>"; };
-/* End PBXFileReference section */
-
-/* Begin PBXFrameworksBuildPhase section */
- D1473F26150CA69B00B20490 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CD00001696FC0B00609AB0 /* Theora.framework in Frameworks */,
- D1CD00011696FC0B00609AB0 /* Vorbis.framework in Frameworks */,
- D1CD00021696FC0B00609AB0 /* Ogg.framework in Frameworks */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F963177A31FC002942E3 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F98F177A31FE002942E3 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F9BC177A3200002942E3 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1BB6FAB150E9E7100EF9400 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFF2F1696C77A00609AB0 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFFF41696FBB200609AB0 /* CoreVideo.framework in Frameworks */,
- D1CDFFF51696FBB200609AB0 /* AVFoundation.framework in Frameworks */,
- D1CDFFF61696FBB200609AB0 /* Foundation.framework in Frameworks */,
- D1CD00051696FF9600609AB0 /* CoreMedia.framework in Frameworks */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFF571696C79700609AB0 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CD00041696FF9400609AB0 /* CoreMedia.framework in Frameworks */,
- D1CDFFF31696FBA800609AB0 /* Foundation.framework in Frameworks */,
- D1CDFFF11696FB8900609AB0 /* CoreVideo.framework in Frameworks */,
- D1CDFFEE1696FB7200609AB0 /* AVFoundation.framework in Frameworks */,
- D1CDFFFD1696FC0800609AB0 /* Theora.framework in Frameworks */,
- D1CDFFFE1696FC0800609AB0 /* Vorbis.framework in Frameworks */,
- D1CDFFFF1696FC0800609AB0 /* Ogg.framework in Frameworks */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFFAE1696E1CA00609AB0 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFFD31696E1D700609AB0 /* Frameworks */ = {
- isa = PBXFrameworksBuildPhase;
- buildActionMask = 2147483647;
- files = (
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
-/* End PBXFrameworksBuildPhase section */
-
-/* Begin PBXGroup section */
- D12CA55417734B2400412E5B /* FFmpeg */ = {
- isa = PBXGroup;
- children = (
- D12CA55517734B4200412E5B /* TheoraVideoClip_FFmpeg.cpp */,
- D12CA55617734B4200412E5B /* TheoraVideoClip_FFmpeg.h */,
- );
- name = FFmpeg;
- sourceTree = "<group>";
- };
- D1358BC118D7776700A36FDC /* config */ = {
- isa = PBXGroup;
- children = (
- D1358BC318D7777800A36FDC /* iOS.xcconfig */,
- D1358BC418D7777800A36FDC /* Mac.xcconfig */,
- D1358BC218D7777200A36FDC /* Info.plist */,
- );
- name = config;
- sourceTree = "<group>";
- };
- D139462A17C0ED2F0091F4A4 /* libyuv */ = {
- isa = PBXGroup;
- children = (
- D1C3D04E17C157AC00CA0FD2 /* include */,
- D1C3D04D17C157A800CA0FD2 /* src */,
- D139462B17C0ED450091F4A4 /* yuv_libyuv.c */,
- D139462C17C0ED450091F4A4 /* yuv_libyuv.h */,
- );
- name = libyuv;
- sourceTree = "<group>";
- };
- D1473F1E150CA69B00B20490 = {
- isa = PBXGroup;
- children = (
- D1358BC118D7776700A36FDC /* config */,
- D147401F150CAE9600B20490 /* include */,
- D1473F42150CA6C000B20490 /* src */,
- D1473F2C150CA69B00B20490 /* Frameworks */,
- D1473F2B150CA69B00B20490 /* Products */,
- );
- sourceTree = "<group>";
- };
- D1473F2B150CA69B00B20490 /* Products */ = {
- isa = PBXGroup;
- children = (
- D1473F2A150CA69B00B20490 /* theoraplayer.framework */,
- D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */,
- D1CDFF481696C77A00609AB0 /* theoraplayer.framework */,
- D1CDFF701696C79700609AB0 /* theoraplayer.framework */,
- D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */,
- D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */,
- D198F97B177A31FC002942E3 /* libtheoraplayer.a */,
- D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */,
- D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */,
- );
- name = Products;
- sourceTree = "<group>";
- };
- D1473F2C150CA69B00B20490 /* Frameworks */ = {
- isa = PBXGroup;
- children = (
- D1473F82150CA7F300B20490 /* mac */,
- );
- name = Frameworks;
- sourceTree = "<group>";
- };
- D1473F42150CA6C000B20490 /* src */ = {
- isa = PBXGroup;
- children = (
- D1E2718516B46F370046C00C /* YUV */,
- D1CDFF921696CEFA00609AB0 /* Theora */,
- D1CDFF931696CF0000609AB0 /* AVFoundation */,
- D12CA55417734B2400412E5B /* FFmpeg */,
- D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */,
- D167759E155C501D0050EC64 /* TheoraAsync.cpp */,
- D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */,
- D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */,
- D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */,
- D16775A1155C501D0050EC64 /* TheoraException.cpp */,
- D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */,
- D16775A3155C501D0050EC64 /* TheoraTimer.cpp */,
- D16775A4155C501D0050EC64 /* TheoraUtil.cpp */,
- D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */,
- D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */,
- D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */,
- );
- name = src;
- sourceTree = "<group>";
- };
- D1473F82150CA7F300B20490 /* mac */ = {
- isa = PBXGroup;
- children = (
- D1CD00031696FF9400609AB0 /* CoreMedia.framework */,
- D1CDFFFB1696FC0100609AB0 /* Theora.framework */,
- D1CDFFF91696FBF700609AB0 /* Vorbis.framework */,
- D1CDFFF71696FBF400609AB0 /* Ogg.framework */,
- D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */,
- D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */,
- D1CDFFF21696FBA800609AB0 /* Foundation.framework */,
- );
- name = mac;
- sourceTree = "<group>";
- };
- D147401F150CAE9600B20490 /* include */ = {
- isa = PBXGroup;
- children = (
- D16775C1155C50280050EC64 /* TheoraAsync.h */,
- D16775C2155C50280050EC64 /* TheoraAudioInterface.h */,
- D16775C3155C50280050EC64 /* TheoraDataSource.h */,
- D16775C4155C50280050EC64 /* TheoraException.h */,
- D16775C5155C50280050EC64 /* TheoraExport.h */,
- D1E271B216B471E80046C00C /* TheoraPixelTransform.h */,
- D16775CB155C50280050EC64 /* TheoraVideoFrame.h */,
- D16775C6155C50280050EC64 /* TheoraFrameQueue.h */,
- D16775C7155C50280050EC64 /* TheoraPlayer.h */,
- D16775C8155C50280050EC64 /* TheoraTimer.h */,
- D16775C9155C50280050EC64 /* TheoraUtil.h */,
- D16775CA155C50280050EC64 /* TheoraVideoClip.h */,
- D16775CC155C50280050EC64 /* TheoraVideoManager.h */,
- D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */,
- D16775CD155C50280050EC64 /* TheoraWorkerThread.h */,
- );
- name = include;
- sourceTree = "<group>";
- };
- D1C3D04D17C157A800CA0FD2 /* src */ = {
- isa = PBXGroup;
- children = (
- D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */,
- D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */,
- D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */,
- D159BCAB17C227940030FAB6 /* compare_win.cc */,
- D1C3D05317C157CD00CA0FD2 /* compare.cc */,
- D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */,
- D1C3D05517C157CD00CA0FD2 /* convert_from_argb.cc */,
- D1C3D05617C157CD00CA0FD2 /* convert_from.cc */,
- D1C3D05717C157CD00CA0FD2 /* convert_jpeg.cc */,
- D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */,
- D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */,
- D1C3D05A17C157CD00CA0FD2 /* convert.cc */,
- D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */,
- D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */,
- D1C3D05D17C157CD00CA0FD2 /* mjpeg_decoder.cc */,
- D1C3D05E17C157CD00CA0FD2 /* mjpeg_validate.cc */,
- D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */,
- D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */,
- D1C3D06117C157CD00CA0FD2 /* rotate_mips.cc */,
- D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */,
- D1C3D06317C157CD00CA0FD2 /* rotate.cc */,
- D1C3D06417C157CD00CA0FD2 /* row_any.cc */,
- D1C3D06517C157CD00CA0FD2 /* row_common.cc */,
- D1C3D06617C157CD00CA0FD2 /* row_mips.cc */,
- D1C3D06717C157CD00CA0FD2 /* row_neon.cc */,
- D1C3D06817C157CD00CA0FD2 /* row_posix.cc */,
- D159BCAC17C227940030FAB6 /* row_win.cc */,
- D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */,
- D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */,
- D1C3D06D17C157CD00CA0FD2 /* scale_mips.cc */,
- D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */,
- D1BCE06C18F3F80800C83470 /* scale_win.cc */,
- D1C3D06F17C157CD00CA0FD2 /* scale.cc */,
- D1BCE05818F3F7FE00C83470 /* scale_common.cc */,
- D1BCE05918F3F7FE00C83470 /* scale_posix.cc */,
- D1C3D07017C157CD00CA0FD2 /* video_common.cc */,
- D159BCAD17C227940030FAB6 /* row_x86.asm */,
- D159BCAE17C227940030FAB6 /* x86inc.asm */,
- );
- name = src;
- sourceTree = "<group>";
- };
- D1C3D04E17C157AC00CA0FD2 /* include */ = {
- isa = PBXGroup;
- children = (
- D1C3D1CD17C15BA900CA0FD2 /* libyuv */,
- D1C3D1CE17C15BB400CA0FD2 /* libyuv.h */,
- );
- name = include;
- sourceTree = "<group>";
- };
- D1C3D1CD17C15BA900CA0FD2 /* libyuv */ = {
- isa = PBXGroup;
- children = (
- D1C3D1CF17C15BC100CA0FD2 /* basic_types.h */,
- D1C3D1D017C15BC100CA0FD2 /* compare.h */,
- D1C3D1D117C15BC100CA0FD2 /* convert_argb.h */,
- D1C3D1D217C15BC100CA0FD2 /* convert_from_argb.h */,
- D1C3D1D317C15BC100CA0FD2 /* convert_from.h */,
- D1C3D1D417C15BC100CA0FD2 /* convert.h */,
- D1C3D1D517C15BC100CA0FD2 /* cpu_id.h */,
- D1C3D1D617C15BC100CA0FD2 /* format_conversion.h */,
- D1C3D1D717C15BC100CA0FD2 /* mjpeg_decoder.h */,
- D1C3D1D817C15BC100CA0FD2 /* planar_functions.h */,
- D1C3D1D917C15BC100CA0FD2 /* rotate_argb.h */,
- D1C3D1DA17C15BC100CA0FD2 /* rotate.h */,
- D1C3D1DB17C15BC100CA0FD2 /* row.h */,
- D1C3D1DC17C15BC100CA0FD2 /* scale_argb.h */,
- D1BCE05718F3F7D800C83470 /* scale_row.h */,
- D1C3D1DD17C15BC100CA0FD2 /* scale.h */,
- D1C3D1DE17C15BC100CA0FD2 /* version.h */,
- D1C3D1DF17C15BC100CA0FD2 /* video_common.h */,
- );
- name = libyuv;
- sourceTree = "<group>";
- };
- D1CDFF921696CEFA00609AB0 /* Theora */ = {
- isa = PBXGroup;
- children = (
- D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */,
- D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */,
- );
- name = Theora;
- sourceTree = "<group>";
- };
- D1CDFF931696CF0000609AB0 /* AVFoundation */ = {
- isa = PBXGroup;
- children = (
- D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */,
- D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */,
- );
- name = AVFoundation;
- sourceTree = "<group>";
- };
- D1E2718516B46F370046C00C /* YUV */ = {
- isa = PBXGroup;
- children = (
- D1F4DA1C18FECABC007C1968 /* android */,
- D139462A17C0ED2F0091F4A4 /* libyuv */,
- D1E2718716B46F4F0046C00C /* C */,
- D13946CA17C119B30091F4A4 /* yuv_util.c */,
- D13946CB17C119B30091F4A4 /* yuv_util.h */,
- );
- name = YUV;
- sourceTree = "<group>";
- };
- D1E2718716B46F4F0046C00C /* C */ = {
- isa = PBXGroup;
- children = (
- D1E271AB16B470210046C00C /* yuv420_rgb_c.c */,
- D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */,
- D1E2718A16B46F640046C00C /* yuv420_grey_c.c */,
- );
- name = C;
- sourceTree = "<group>";
- };
- D1F4DA1C18FECABC007C1968 /* android */ = {
- isa = PBXGroup;
- children = (
- D1F4DA1D18FECACE007C1968 /* cpu-features.c */,
- D1F4DA1E18FECACE007C1968 /* cpu-features.h */,
- );
- name = android;
- sourceTree = "<group>";
- };
-/* End PBXGroup section */
-
-/* Begin PBXHeadersBuildPhase section */
- D1473F27150CA69B00B20490 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D16775CE155C50280050EC64 /* TheoraAsync.h in Headers */,
- D16775D0155C50280050EC64 /* TheoraAudioInterface.h in Headers */,
- D16775D2155C50280050EC64 /* TheoraDataSource.h in Headers */,
- D16775D4155C50280050EC64 /* TheoraException.h in Headers */,
- D16775D6155C50280050EC64 /* TheoraExport.h in Headers */,
- D16775D8155C50280050EC64 /* TheoraFrameQueue.h in Headers */,
- D16775DA155C50280050EC64 /* TheoraPlayer.h in Headers */,
- D16775DC155C50280050EC64 /* TheoraTimer.h in Headers */,
- D16775DE155C50280050EC64 /* TheoraUtil.h in Headers */,
- D16775E0155C50280050EC64 /* TheoraVideoClip.h in Headers */,
- D16775E2155C50280050EC64 /* TheoraVideoFrame.h in Headers */,
- D16775E4155C50280050EC64 /* TheoraVideoManager.h in Headers */,
- D16775E6155C50280050EC64 /* TheoraWorkerThread.h in Headers */,
- D1D465D616C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */,
- D1E271B316B471E80046C00C /* TheoraPixelTransform.h in Headers */,
- D1CDFF991696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */,
- D139463617C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946D517C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F964177A31FC002942E3 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D198F965177A31FC002942E3 /* TheoraAsync.h in Headers */,
- D198F966177A31FC002942E3 /* TheoraAudioInterface.h in Headers */,
- D198F967177A31FC002942E3 /* TheoraDataSource.h in Headers */,
- D198F968177A31FC002942E3 /* TheoraException.h in Headers */,
- D198F969177A31FC002942E3 /* TheoraExport.h in Headers */,
- D198F96A177A31FC002942E3 /* TheoraFrameQueue.h in Headers */,
- D198F96B177A31FC002942E3 /* TheoraPlayer.h in Headers */,
- D198F96C177A31FC002942E3 /* TheoraTimer.h in Headers */,
- D198F96D177A31FC002942E3 /* TheoraUtil.h in Headers */,
- D198F96E177A31FC002942E3 /* TheoraVideoClip.h in Headers */,
- D198F96F177A31FC002942E3 /* TheoraVideoFrame.h in Headers */,
- D198F970177A31FC002942E3 /* TheoraVideoManager.h in Headers */,
- D198F971177A31FC002942E3 /* TheoraWorkerThread.h in Headers */,
- D198F972177A31FC002942E3 /* TheoraVideoClip_Theora.h in Headers */,
- D198F974177A31FC002942E3 /* TheoraPixelTransform.h in Headers */,
- D139463917C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946D817C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F990177A31FE002942E3 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D198F991177A31FE002942E3 /* TheoraAsync.h in Headers */,
- D198F992177A31FE002942E3 /* TheoraAudioInterface.h in Headers */,
- D198F993177A31FE002942E3 /* TheoraDataSource.h in Headers */,
- D198F994177A31FE002942E3 /* TheoraException.h in Headers */,
- D198F995177A31FE002942E3 /* TheoraExport.h in Headers */,
- D198F996177A31FE002942E3 /* TheoraFrameQueue.h in Headers */,
- D198F997177A31FE002942E3 /* TheoraPlayer.h in Headers */,
- D198F998177A31FE002942E3 /* TheoraTimer.h in Headers */,
- D198F999177A31FE002942E3 /* TheoraUtil.h in Headers */,
- D198F99A177A31FE002942E3 /* TheoraVideoClip.h in Headers */,
- D198F99B177A31FE002942E3 /* TheoraVideoFrame.h in Headers */,
- D198F99C177A31FE002942E3 /* TheoraVideoManager.h in Headers */,
- D198F99D177A31FE002942E3 /* TheoraWorkerThread.h in Headers */,
- D198F99E177A31FE002942E3 /* TheoraVideoClip_Theora.h in Headers */,
- D198F9A0177A31FE002942E3 /* TheoraPixelTransform.h in Headers */,
- D139463A17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946D917C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F9BD177A3200002942E3 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D198F9BE177A3200002942E3 /* TheoraAsync.h in Headers */,
- D198F9BF177A3200002942E3 /* TheoraAudioInterface.h in Headers */,
- D198F9C0177A3200002942E3 /* TheoraDataSource.h in Headers */,
- D198F9C1177A3200002942E3 /* TheoraException.h in Headers */,
- D198F9C2177A3200002942E3 /* TheoraExport.h in Headers */,
- D198F9C3177A3200002942E3 /* TheoraFrameQueue.h in Headers */,
- D198F9C4177A3200002942E3 /* TheoraPlayer.h in Headers */,
- D198F9C5177A3200002942E3 /* TheoraTimer.h in Headers */,
- D198F9C6177A3200002942E3 /* TheoraUtil.h in Headers */,
- D198F9C7177A3200002942E3 /* TheoraVideoClip.h in Headers */,
- D198F9C8177A3200002942E3 /* TheoraVideoFrame.h in Headers */,
- D198F9C9177A3200002942E3 /* TheoraVideoManager.h in Headers */,
- D198F9CA177A3200002942E3 /* TheoraWorkerThread.h in Headers */,
- D198F9CB177A3200002942E3 /* TheoraVideoClip_Theora.h in Headers */,
- D198F9CD177A3200002942E3 /* TheoraPixelTransform.h in Headers */,
- D139463B17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946DA17C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1BB6FAC150E9E7100EF9400 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D16775CF155C50280050EC64 /* TheoraAsync.h in Headers */,
- D16775D1155C50280050EC64 /* TheoraAudioInterface.h in Headers */,
- D16775D3155C50280050EC64 /* TheoraDataSource.h in Headers */,
- D16775D5155C50280050EC64 /* TheoraException.h in Headers */,
- D16775D7155C50280050EC64 /* TheoraExport.h in Headers */,
- D16775D9155C50280050EC64 /* TheoraFrameQueue.h in Headers */,
- D16775DB155C50280050EC64 /* TheoraPlayer.h in Headers */,
- D16775DD155C50280050EC64 /* TheoraTimer.h in Headers */,
- D16775DF155C50280050EC64 /* TheoraUtil.h in Headers */,
- D16775E1155C50280050EC64 /* TheoraVideoClip.h in Headers */,
- D16775E3155C50280050EC64 /* TheoraVideoFrame.h in Headers */,
- D16775E5155C50280050EC64 /* TheoraVideoManager.h in Headers */,
- D16775E7155C50280050EC64 /* TheoraWorkerThread.h in Headers */,
- D1CDFF9B1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */,
- D1E271B616B471E80046C00C /* TheoraPixelTransform.h in Headers */,
- D139463C17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946DB17C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFF331696C77A00609AB0 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFF341696C77A00609AB0 /* TheoraAsync.h in Headers */,
- D1CDFF351696C77A00609AB0 /* TheoraAudioInterface.h in Headers */,
- D1CDFF361696C77A00609AB0 /* TheoraDataSource.h in Headers */,
- D1CDFF371696C77A00609AB0 /* TheoraException.h in Headers */,
- D1CDFF381696C77A00609AB0 /* TheoraExport.h in Headers */,
- D1CDFF391696C77A00609AB0 /* TheoraFrameQueue.h in Headers */,
- D1CDFF3A1696C77A00609AB0 /* TheoraPlayer.h in Headers */,
- D1CDFF3B1696C77A00609AB0 /* TheoraTimer.h in Headers */,
- D1CDFF3C1696C77A00609AB0 /* TheoraUtil.h in Headers */,
- D1CDFF3D1696C77A00609AB0 /* TheoraVideoClip.h in Headers */,
- D1CDFF3E1696C77A00609AB0 /* TheoraVideoFrame.h in Headers */,
- D1CDFF3F1696C77A00609AB0 /* TheoraVideoManager.h in Headers */,
- D1CDFF401696C77A00609AB0 /* TheoraWorkerThread.h in Headers */,
- D1E271B416B471E80046C00C /* TheoraPixelTransform.h in Headers */,
- D1D465D716C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */,
- D1CDFF9F1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h in Headers */,
- D139463717C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946D617C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFF5B1696C79700609AB0 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFF5C1696C79700609AB0 /* TheoraAsync.h in Headers */,
- D1CDFF5D1696C79700609AB0 /* TheoraAudioInterface.h in Headers */,
- D1CDFF5E1696C79700609AB0 /* TheoraDataSource.h in Headers */,
- D1CDFF5F1696C79700609AB0 /* TheoraException.h in Headers */,
- D1CDFF601696C79700609AB0 /* TheoraExport.h in Headers */,
- D1CDFF611696C79700609AB0 /* TheoraFrameQueue.h in Headers */,
- D1CDFF621696C79700609AB0 /* TheoraPlayer.h in Headers */,
- D1CDFF631696C79700609AB0 /* TheoraTimer.h in Headers */,
- D1CDFF641696C79700609AB0 /* TheoraUtil.h in Headers */,
- D1CDFF651696C79700609AB0 /* TheoraVideoClip.h in Headers */,
- D1CDFF661696C79700609AB0 /* TheoraVideoFrame.h in Headers */,
- D1CDFF671696C79700609AB0 /* TheoraVideoManager.h in Headers */,
- D1CDFF681696C79700609AB0 /* TheoraWorkerThread.h in Headers */,
- D1E271B516B471E80046C00C /* TheoraPixelTransform.h in Headers */,
- D1D465D816C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */,
- D1CDFF9A1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */,
- D1F09EB1169AFEFB00DEEC63 /* TheoraVideoClip_AVFoundation.h in Headers */,
- D139463817C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946D717C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFFAF1696E1CA00609AB0 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFFB01696E1CA00609AB0 /* TheoraAsync.h in Headers */,
- D1CDFFB11696E1CA00609AB0 /* TheoraAudioInterface.h in Headers */,
- D1CDFFB21696E1CA00609AB0 /* TheoraDataSource.h in Headers */,
- D1CDFFB31696E1CA00609AB0 /* TheoraException.h in Headers */,
- D1CDFFB41696E1CA00609AB0 /* TheoraExport.h in Headers */,
- D1CDFFB51696E1CA00609AB0 /* TheoraFrameQueue.h in Headers */,
- D1CDFFB61696E1CA00609AB0 /* TheoraPlayer.h in Headers */,
- D1CDFFB71696E1CA00609AB0 /* TheoraTimer.h in Headers */,
- D1CDFFB81696E1CA00609AB0 /* TheoraUtil.h in Headers */,
- D1CDFFB91696E1CA00609AB0 /* TheoraVideoClip.h in Headers */,
- D1CDFFBA1696E1CA00609AB0 /* TheoraVideoFrame.h in Headers */,
- D1CDFFBB1696E1CA00609AB0 /* TheoraVideoManager.h in Headers */,
- D1CDFFBC1696E1CA00609AB0 /* TheoraWorkerThread.h in Headers */,
- D1CDFFBD1696E1CA00609AB0 /* TheoraVideoClip_Theora.h in Headers */,
- D1E271B716B471E80046C00C /* TheoraPixelTransform.h in Headers */,
- D139463D17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946DC17C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFFD41696E1D700609AB0 /* Headers */ = {
- isa = PBXHeadersBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFFD51696E1D700609AB0 /* TheoraAsync.h in Headers */,
- D1CDFFD61696E1D700609AB0 /* TheoraAudioInterface.h in Headers */,
- D1CDFFD71696E1D700609AB0 /* TheoraDataSource.h in Headers */,
- D1CDFFD81696E1D700609AB0 /* TheoraException.h in Headers */,
- D1CDFFD91696E1D700609AB0 /* TheoraExport.h in Headers */,
- D1CDFFDA1696E1D700609AB0 /* TheoraFrameQueue.h in Headers */,
- D1CDFFDB1696E1D700609AB0 /* TheoraPlayer.h in Headers */,
- D1CDFFDC1696E1D700609AB0 /* TheoraTimer.h in Headers */,
- D1CDFFDD1696E1D700609AB0 /* TheoraUtil.h in Headers */,
- D1CDFFDE1696E1D700609AB0 /* TheoraVideoClip.h in Headers */,
- D1CDFFDF1696E1D700609AB0 /* TheoraVideoFrame.h in Headers */,
- D1CDFFE01696E1D700609AB0 /* TheoraVideoManager.h in Headers */,
- D1CDFFE11696E1D700609AB0 /* TheoraWorkerThread.h in Headers */,
- D1CDFFE21696E1D700609AB0 /* TheoraVideoClip_Theora.h in Headers */,
- D1E271B816B471E80046C00C /* TheoraPixelTransform.h in Headers */,
- D139463E17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
- D13946DD17C119B40091F4A4 /* yuv_util.h in Headers */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
-/* End PBXHeadersBuildPhase section */
-
-/* Begin PBXNativeTarget section */
- D1473F29150CA69B00B20490 /* theoraplayer (Theora) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D1473F3F150CA69B00B20490 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora)" */;
- buildPhases = (
- D1473F25150CA69B00B20490 /* Sources */,
- D1473F26150CA69B00B20490 /* Frameworks */,
- D1473F27150CA69B00B20490 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (Theora)";
- productName = theoraplayer;
- productReference = D1473F2A150CA69B00B20490 /* theoraplayer.framework */;
- productType = "com.apple.product-type.framework";
- };
- D198F950177A31FC002942E3 /* theoraplayer (Mac Theora) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D198F975177A31FC002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora)" */;
- buildPhases = (
- D198F951177A31FC002942E3 /* Sources */,
- D198F963177A31FC002942E3 /* Frameworks */,
- D198F964177A31FC002942E3 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (Mac Theora)";
- productName = libtheoraplayer;
- productReference = D198F97B177A31FC002942E3 /* libtheoraplayer.a */;
- productType = "com.apple.product-type.library.static";
- };
- D198F97C177A31FE002942E3 /* theoraplayer (Mac AVFoundation) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D198F9A1177A31FE002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac AVFoundation)" */;
- buildPhases = (
- D198F97D177A31FE002942E3 /* Sources */,
- D198F98F177A31FE002942E3 /* Frameworks */,
- D198F990177A31FE002942E3 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (Mac AVFoundation)";
- productName = libtheoraplayer;
- productReference = D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */;
- productType = "com.apple.product-type.library.static";
- };
- D198F9A8177A3200002942E3 /* theoraplayer (Mac Theora AVFoundation) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D198F9CE177A3200002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora AVFoundation)" */;
- buildPhases = (
- D198F9A9177A3200002942E3 /* Sources */,
- D198F9BC177A3200002942E3 /* Frameworks */,
- D198F9BD177A3200002942E3 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (Mac Theora AVFoundation)";
- productName = libtheoraplayer;
- productReference = D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */;
- productType = "com.apple.product-type.library.static";
- };
- D1BB6FAD150E9E7100EF9400 /* theoraplayer (iOS Theora) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D1BB6FBC150E9E7100EF9400 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora)" */;
- buildPhases = (
- D1BB6FAA150E9E7100EF9400 /* Sources */,
- D1BB6FAB150E9E7100EF9400 /* Frameworks */,
- D1BB6FAC150E9E7100EF9400 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (iOS Theora)";
- productName = libtheoraplayer;
- productReference = D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */;
- productType = "com.apple.product-type.library.static";
- };
- D1CDFF221696C77A00609AB0 /* theoraplayer (AVFoundation) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D1CDFF421696C77A00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (AVFoundation)" */;
- buildPhases = (
- D1CDFF231696C77A00609AB0 /* Sources */,
- D1CDFF2F1696C77A00609AB0 /* Frameworks */,
- D1CDFF331696C77A00609AB0 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (AVFoundation)";
- productName = theoraplayer;
- productReference = D1CDFF481696C77A00609AB0 /* theoraplayer.framework */;
- productType = "com.apple.product-type.framework";
- };
- D1CDFF4A1696C79700609AB0 /* theoraplayer (Theora AVFoundation) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D1CDFF6A1696C79700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora AVFoundation)" */;
- buildPhases = (
- D1CDFF4B1696C79700609AB0 /* Sources */,
- D1CDFF571696C79700609AB0 /* Frameworks */,
- D1CDFF5B1696C79700609AB0 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (Theora AVFoundation)";
- productName = theoraplayer;
- productReference = D1CDFF701696C79700609AB0 /* theoraplayer.framework */;
- productType = "com.apple.product-type.framework";
- };
- D1CDFFA01696E1CA00609AB0 /* theoraplayer (iOS AVFoundation) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D1CDFFBE1696E1CA00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS AVFoundation)" */;
- buildPhases = (
- D1CDFFA11696E1CA00609AB0 /* Sources */,
- D1CDFFAE1696E1CA00609AB0 /* Frameworks */,
- D1CDFFAF1696E1CA00609AB0 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (iOS AVFoundation)";
- productName = libtheoraplayer;
- productReference = D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */;
- productType = "com.apple.product-type.library.static";
- };
- D1CDFFC51696E1D700609AB0 /* theoraplayer (iOS Theora AVFoundation) */ = {
- isa = PBXNativeTarget;
- buildConfigurationList = D1CDFFE31696E1D700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora AVFoundation)" */;
- buildPhases = (
- D1CDFFC61696E1D700609AB0 /* Sources */,
- D1CDFFD31696E1D700609AB0 /* Frameworks */,
- D1CDFFD41696E1D700609AB0 /* Headers */,
- );
- buildRules = (
- );
- dependencies = (
- );
- name = "theoraplayer (iOS Theora AVFoundation)";
- productName = libtheoraplayer;
- productReference = D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */;
- productType = "com.apple.product-type.library.static";
- };
-/* End PBXNativeTarget section */
-
-/* Begin PBXProject section */
- D1473F20150CA69B00B20490 /* Project object */ = {
- isa = PBXProject;
- attributes = {
- LastUpgradeCheck = 0510;
- };
- buildConfigurationList = D1473F23150CA69B00B20490 /* Build configuration list for PBXProject "theoraplayer" */;
- compatibilityVersion = "Xcode 3.2";
- developmentRegion = English;
- hasScannedForEncodings = 0;
- knownRegions = (
- en,
- );
- mainGroup = D1473F1E150CA69B00B20490;
- productRefGroup = D1473F2B150CA69B00B20490 /* Products */;
- projectDirPath = "";
- projectRoot = "";
- targets = (
- D1473F29150CA69B00B20490 /* theoraplayer (Theora) */,
- D1CDFF221696C77A00609AB0 /* theoraplayer (AVFoundation) */,
- D1CDFF4A1696C79700609AB0 /* theoraplayer (Theora AVFoundation) */,
- D198F950177A31FC002942E3 /* theoraplayer (Mac Theora) */,
- D198F97C177A31FE002942E3 /* theoraplayer (Mac AVFoundation) */,
- D198F9A8177A3200002942E3 /* theoraplayer (Mac Theora AVFoundation) */,
- D1BB6FAD150E9E7100EF9400 /* theoraplayer (iOS Theora) */,
- D1CDFFA01696E1CA00609AB0 /* theoraplayer (iOS AVFoundation) */,
- D1CDFFC51696E1D700609AB0 /* theoraplayer (iOS Theora AVFoundation) */,
- );
- };
-/* End PBXProject section */
-
-/* Begin PBXSourcesBuildPhase section */
- D1473F25150CA69B00B20490 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D16775AB155C501D0050EC64 /* TheoraAsync.cpp in Sources */,
- D16775AD155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */,
- D16775AF155C501D0050EC64 /* TheoraDataSource.cpp in Sources */,
- D16775B1155C501D0050EC64 /* TheoraException.cpp in Sources */,
- D16775B3155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */,
- D16775B5155C501D0050EC64 /* TheoraTimer.cpp in Sources */,
- D16775B7155C501D0050EC64 /* TheoraUtil.cpp in Sources */,
- D16775B9155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */,
- D16775BB155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */,
- D16775BD155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */,
- D16775BF155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */,
- D1CDFF961696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
- D1E2719916B46F640046C00C /* yuv420_grey_c.c in Sources */,
- D1E271A516B46F640046C00C /* yuv420_yuv_c.c in Sources */,
- D1E271AC16B470210046C00C /* yuv420_rgb_c.c in Sources */,
- D1BCE05A18F3F7FE00C83470 /* scale_common.cc in Sources */,
- D1D465DA16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
- D139462D17C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D13946CC17C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07217C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08417C157CD00CA0FD2 /* compare_posix.cc in Sources */,
- D1BCE06318F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D1C3D09617C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D09F17C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0C317C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0CC17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0D517C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0DE17C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0E717C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10217C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12617C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D12F17C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13817C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15317C157CD00CA0FD2 /* row_posix.cc in Sources */,
- D1C3D17717C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D19B17C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB017C227F30030FAB6 /* convert_from.cc in Sources */,
- D159BCB917C228310030FAB6 /* rotate_argb.cc in Sources */,
- D159BCC217C2286D0030FAB6 /* scale.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F951177A31FC002942E3 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D198F952177A31FC002942E3 /* TheoraAsync.cpp in Sources */,
- D198F953177A31FC002942E3 /* TheoraAudioInterface.cpp in Sources */,
- D198F954177A31FC002942E3 /* TheoraDataSource.cpp in Sources */,
- D198F955177A31FC002942E3 /* TheoraException.cpp in Sources */,
- D198F956177A31FC002942E3 /* TheoraFrameQueue.cpp in Sources */,
- D198F957177A31FC002942E3 /* TheoraTimer.cpp in Sources */,
- D198F958177A31FC002942E3 /* TheoraUtil.cpp in Sources */,
- D198F959177A31FC002942E3 /* TheoraVideoClip.cpp in Sources */,
- D198F95A177A31FC002942E3 /* TheoraVideoFrame.cpp in Sources */,
- D198F95B177A31FC002942E3 /* TheoraVideoManager.cpp in Sources */,
- D198F95C177A31FC002942E3 /* TheoraWorkerThread.cpp in Sources */,
- D198F95D177A31FC002942E3 /* TheoraVideoClip_Theora.cpp in Sources */,
- D198F95F177A31FC002942E3 /* yuv420_grey_c.c in Sources */,
- D198F960177A31FC002942E3 /* yuv420_yuv_c.c in Sources */,
- D198F961177A31FC002942E3 /* yuv420_rgb_c.c in Sources */,
- D1BCE05D18F3F7FE00C83470 /* scale_common.cc in Sources */,
- D198F962177A31FC002942E3 /* TheoraAudioPacketQueue.cpp in Sources */,
- D139463017C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D13946CF17C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07517C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08717C157CD00CA0FD2 /* compare_posix.cc in Sources */,
- D1BCE06618F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D1C3D09917C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A217C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0C617C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0CF17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0D817C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0E117C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0EA17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10517C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12917C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13217C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13B17C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15617C157CD00CA0FD2 /* row_posix.cc in Sources */,
- D1C3D17A17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D19E17C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB317C227F40030FAB6 /* convert_from.cc in Sources */,
- D159BCBC17C228330030FAB6 /* rotate_argb.cc in Sources */,
- D159BCC517C2286E0030FAB6 /* scale.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F97D177A31FE002942E3 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D198F97E177A31FE002942E3 /* TheoraAsync.cpp in Sources */,
- D198F97F177A31FE002942E3 /* TheoraAudioInterface.cpp in Sources */,
- D198F980177A31FE002942E3 /* TheoraDataSource.cpp in Sources */,
- D198F981177A31FE002942E3 /* TheoraException.cpp in Sources */,
- D198F982177A31FE002942E3 /* TheoraFrameQueue.cpp in Sources */,
- D198F983177A31FE002942E3 /* TheoraTimer.cpp in Sources */,
- D198F984177A31FE002942E3 /* TheoraUtil.cpp in Sources */,
- D198F985177A31FE002942E3 /* TheoraVideoClip.cpp in Sources */,
- D198F986177A31FE002942E3 /* TheoraVideoFrame.cpp in Sources */,
- D198F987177A31FE002942E3 /* TheoraVideoManager.cpp in Sources */,
- D198F988177A31FE002942E3 /* TheoraWorkerThread.cpp in Sources */,
- D198F989177A31FE002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */,
- D198F98B177A31FE002942E3 /* yuv420_grey_c.c in Sources */,
- D198F98C177A31FE002942E3 /* yuv420_yuv_c.c in Sources */,
- D198F98D177A31FE002942E3 /* yuv420_rgb_c.c in Sources */,
- D1BCE05E18F3F7FE00C83470 /* scale_common.cc in Sources */,
- D198F98E177A31FE002942E3 /* TheoraAudioPacketQueue.cpp in Sources */,
- D139463117C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D13946D017C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07617C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08817C157CD00CA0FD2 /* compare_posix.cc in Sources */,
- D1BCE06718F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D1C3D09A17C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A317C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0C717C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0D017C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0D917C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0E217C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0EB17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10617C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12A17C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13317C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13C17C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15717C157CD00CA0FD2 /* row_posix.cc in Sources */,
- D1C3D17B17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D19F17C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB417C227F50030FAB6 /* convert_from.cc in Sources */,
- D159BCBD17C228330030FAB6 /* rotate_argb.cc in Sources */,
- D159BCC617C2286E0030FAB6 /* scale.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D198F9A9177A3200002942E3 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D198F9AA177A3200002942E3 /* TheoraAsync.cpp in Sources */,
- D198F9AB177A3200002942E3 /* TheoraAudioInterface.cpp in Sources */,
- D198F9AC177A3200002942E3 /* TheoraDataSource.cpp in Sources */,
- D198F9AD177A3200002942E3 /* TheoraException.cpp in Sources */,
- D198F9AE177A3200002942E3 /* TheoraFrameQueue.cpp in Sources */,
- D198F9AF177A3200002942E3 /* TheoraTimer.cpp in Sources */,
- D198F9B0177A3200002942E3 /* TheoraUtil.cpp in Sources */,
- D198F9B1177A3200002942E3 /* TheoraVideoClip.cpp in Sources */,
- D198F9B2177A3200002942E3 /* TheoraVideoFrame.cpp in Sources */,
- D198F9B3177A3200002942E3 /* TheoraVideoManager.cpp in Sources */,
- D198F9B4177A3200002942E3 /* TheoraWorkerThread.cpp in Sources */,
- D198F9B5177A3200002942E3 /* TheoraVideoClip_Theora.cpp in Sources */,
- D1BCE06818F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D198F9B6177A3200002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */,
- D198F9B8177A3200002942E3 /* yuv420_grey_c.c in Sources */,
- D198F9B9177A3200002942E3 /* yuv420_yuv_c.c in Sources */,
- D198F9BA177A3200002942E3 /* yuv420_rgb_c.c in Sources */,
- D198F9BB177A3200002942E3 /* TheoraAudioPacketQueue.cpp in Sources */,
- D139463217C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D13946D117C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07717C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08917C157CD00CA0FD2 /* compare_posix.cc in Sources */,
- D1C3D09B17C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A417C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0C817C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0D117C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0DA17C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0E317C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0EC17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10717C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12B17C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13417C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13D17C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15817C157CD00CA0FD2 /* row_posix.cc in Sources */,
- D1C3D17C17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D1A017C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB517C227F50030FAB6 /* convert_from.cc in Sources */,
- D159BCBE17C228340030FAB6 /* rotate_argb.cc in Sources */,
- D1BCE05F18F3F7FE00C83470 /* scale_common.cc in Sources */,
- D159BCC717C2286F0030FAB6 /* scale.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1BB6FAA150E9E7100EF9400 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D16775AC155C501D0050EC64 /* TheoraAsync.cpp in Sources */,
- D1BCE06018F3F7FE00C83470 /* scale_common.cc in Sources */,
- D16775AE155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */,
- D16775B0155C501D0050EC64 /* TheoraDataSource.cpp in Sources */,
- D16775B2155C501D0050EC64 /* TheoraException.cpp in Sources */,
- D16775B4155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */,
- D16775B6155C501D0050EC64 /* TheoraTimer.cpp in Sources */,
- D16775B8155C501D0050EC64 /* TheoraUtil.cpp in Sources */,
- D16775BA155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */,
- D16775BC155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */,
- D16775BE155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */,
- D16775C0155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */,
- D1BCE06918F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D1CDFF981696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
- D1E2719C16B46F640046C00C /* yuv420_grey_c.c in Sources */,
- D1E271A816B46F640046C00C /* yuv420_yuv_c.c in Sources */,
- D1E271AF16B470210046C00C /* yuv420_rgb_c.c in Sources */,
- D139463317C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D1D465DD16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
- D13946D217C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07817C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08117C157CD00CA0FD2 /* compare_neon.cc in Sources */,
- D1C3D09C17C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A517C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0C917C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0D217C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0DB17C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0E417C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0ED17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10817C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12317C157CD00CA0FD2 /* rotate_neon.cc in Sources */,
- D1C3D12C17C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13517C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13E17C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15017C157CD00CA0FD2 /* row_neon.cc in Sources */,
- D1C3D17417C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */,
- D1C3D17D17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D18F17C157CD00CA0FD2 /* scale_neon.cc in Sources */,
- D1C3D1A117C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB617C227F60030FAB6 /* convert_from.cc in Sources */,
- D159BCBF17C228340030FAB6 /* rotate_argb.cc in Sources */,
- D159BCC817C2286F0030FAB6 /* scale.cc in Sources */,
- D15D361017C386A600F40439 /* row_posix.cc in Sources */,
- D15D361317C386B100F40439 /* compare_posix.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFF231696C77A00609AB0 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFF241696C77A00609AB0 /* TheoraAsync.cpp in Sources */,
- D1CDFF251696C77A00609AB0 /* TheoraAudioInterface.cpp in Sources */,
- D1CDFF261696C77A00609AB0 /* TheoraDataSource.cpp in Sources */,
- D1CDFF271696C77A00609AB0 /* TheoraException.cpp in Sources */,
- D1CDFF281696C77A00609AB0 /* TheoraFrameQueue.cpp in Sources */,
- D1CDFF291696C77A00609AB0 /* TheoraTimer.cpp in Sources */,
- D1CDFF2A1696C77A00609AB0 /* TheoraUtil.cpp in Sources */,
- D1CDFF2B1696C77A00609AB0 /* TheoraVideoClip.cpp in Sources */,
- D1CDFF2C1696C77A00609AB0 /* TheoraVideoFrame.cpp in Sources */,
- D1CDFF2D1696C77A00609AB0 /* TheoraVideoManager.cpp in Sources */,
- D1CDFF2E1696C77A00609AB0 /* TheoraWorkerThread.cpp in Sources */,
- D1CDFF9E1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
- D1E2719A16B46F640046C00C /* yuv420_grey_c.c in Sources */,
- D1E271A616B46F640046C00C /* yuv420_yuv_c.c in Sources */,
- D1E271AD16B470210046C00C /* yuv420_rgb_c.c in Sources */,
- D1BCE05B18F3F7FE00C83470 /* scale_common.cc in Sources */,
- D1D465DB16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
- D139462E17C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D13946CD17C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07317C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08517C157CD00CA0FD2 /* compare_posix.cc in Sources */,
- D1BCE06418F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D1C3D09717C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A017C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0C417C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0CD17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0D617C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0DF17C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0E817C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10317C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12717C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13017C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13917C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15417C157CD00CA0FD2 /* row_posix.cc in Sources */,
- D1C3D17817C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D19C17C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB117C227F40030FAB6 /* convert_from.cc in Sources */,
- D159BCBA17C228320030FAB6 /* rotate_argb.cc in Sources */,
- D159BCC317C2286D0030FAB6 /* scale.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFF4B1696C79700609AB0 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFF4C1696C79700609AB0 /* TheoraAsync.cpp in Sources */,
- D1CDFF4D1696C79700609AB0 /* TheoraAudioInterface.cpp in Sources */,
- D1CDFF4E1696C79700609AB0 /* TheoraDataSource.cpp in Sources */,
- D1CDFF4F1696C79700609AB0 /* TheoraException.cpp in Sources */,
- D1CDFF501696C79700609AB0 /* TheoraFrameQueue.cpp in Sources */,
- D1CDFF511696C79700609AB0 /* TheoraTimer.cpp in Sources */,
- D1CDFF521696C79700609AB0 /* TheoraUtil.cpp in Sources */,
- D1CDFF531696C79700609AB0 /* TheoraVideoClip.cpp in Sources */,
- D1CDFF541696C79700609AB0 /* TheoraVideoFrame.cpp in Sources */,
- D1CDFF551696C79700609AB0 /* TheoraVideoManager.cpp in Sources */,
- D1CDFF561696C79700609AB0 /* TheoraWorkerThread.cpp in Sources */,
- D1CDFF971696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
- D1BCE06518F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D1CDFFEC1696E24F00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
- D1E2719B16B46F640046C00C /* yuv420_grey_c.c in Sources */,
- D1E271A716B46F640046C00C /* yuv420_yuv_c.c in Sources */,
- D1E271AE16B470210046C00C /* yuv420_rgb_c.c in Sources */,
- D1D465DC16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
- D139462F17C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D13946CE17C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07417C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08617C157CD00CA0FD2 /* compare_posix.cc in Sources */,
- D1C3D09817C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A117C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0C517C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0CE17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0D717C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0E017C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0E917C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10417C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12817C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13117C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13A17C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15517C157CD00CA0FD2 /* row_posix.cc in Sources */,
- D1C3D17917C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D19D17C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB217C227F40030FAB6 /* convert_from.cc in Sources */,
- D159BCBB17C228320030FAB6 /* rotate_argb.cc in Sources */,
- D1BCE05C18F3F7FE00C83470 /* scale_common.cc in Sources */,
- D159BCC417C2286D0030FAB6 /* scale.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFFA11696E1CA00609AB0 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFFA21696E1CA00609AB0 /* TheoraAsync.cpp in Sources */,
- D1BCE06118F3F7FE00C83470 /* scale_common.cc in Sources */,
- D1CDFFA31696E1CA00609AB0 /* TheoraAudioInterface.cpp in Sources */,
- D1CDFFA41696E1CA00609AB0 /* TheoraDataSource.cpp in Sources */,
- D1CDFFA51696E1CA00609AB0 /* TheoraException.cpp in Sources */,
- D1CDFFA61696E1CA00609AB0 /* TheoraFrameQueue.cpp in Sources */,
- D1CDFFA71696E1CA00609AB0 /* TheoraTimer.cpp in Sources */,
- D1CDFFA81696E1CA00609AB0 /* TheoraUtil.cpp in Sources */,
- D1CDFFA91696E1CA00609AB0 /* TheoraVideoClip.cpp in Sources */,
- D1CDFFAA1696E1CA00609AB0 /* TheoraVideoFrame.cpp in Sources */,
- D1CDFFAB1696E1CA00609AB0 /* TheoraVideoManager.cpp in Sources */,
- D1CDFFAC1696E1CA00609AB0 /* TheoraWorkerThread.cpp in Sources */,
- D1BCE06A18F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D1CDFFEA1696E24B00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
- D1E2719D16B46F640046C00C /* yuv420_grey_c.c in Sources */,
- D1E271A916B46F640046C00C /* yuv420_yuv_c.c in Sources */,
- D1E271B016B470210046C00C /* yuv420_rgb_c.c in Sources */,
- D139463417C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
- D1D465DE16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
- D13946D317C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07917C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08217C157CD00CA0FD2 /* compare_neon.cc in Sources */,
- D1C3D09D17C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A617C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0CA17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0D317C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0DC17C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0E517C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0EE17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10917C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12417C157CD00CA0FD2 /* rotate_neon.cc in Sources */,
- D1C3D12D17C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13617C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D13F17C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15117C157CD00CA0FD2 /* row_neon.cc in Sources */,
- D1C3D17517C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */,
- D1C3D17E17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D19017C157CD00CA0FD2 /* scale_neon.cc in Sources */,
- D1C3D1A217C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB717C227F60030FAB6 /* convert_from.cc in Sources */,
- D159BCC017C228340030FAB6 /* rotate_argb.cc in Sources */,
- D159BCC917C2286F0030FAB6 /* scale.cc in Sources */,
- D15D361117C386A600F40439 /* row_posix.cc in Sources */,
- D15D361617C386B400F40439 /* compare_posix.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
- D1CDFFC61696E1D700609AB0 /* Sources */ = {
- isa = PBXSourcesBuildPhase;
- buildActionMask = 2147483647;
- files = (
- D1CDFFC71696E1D700609AB0 /* TheoraAsync.cpp in Sources */,
- D1CDFFC81696E1D700609AB0 /* TheoraAudioInterface.cpp in Sources */,
- D1CDFFC91696E1D700609AB0 /* TheoraDataSource.cpp in Sources */,
- D1CDFFCA1696E1D700609AB0 /* TheoraException.cpp in Sources */,
- D1CDFFCB1696E1D700609AB0 /* TheoraFrameQueue.cpp in Sources */,
- D1CDFFCC1696E1D700609AB0 /* TheoraTimer.cpp in Sources */,
- D1CDFFCD1696E1D700609AB0 /* TheoraUtil.cpp in Sources */,
- D1CDFFCE1696E1D700609AB0 /* TheoraVideoClip.cpp in Sources */,
- D1CDFFCF1696E1D700609AB0 /* TheoraVideoFrame.cpp in Sources */,
- D1CDFFD01696E1D700609AB0 /* TheoraVideoManager.cpp in Sources */,
- D1CDFFD11696E1D700609AB0 /* TheoraWorkerThread.cpp in Sources */,
- D1CDFFD21696E1D700609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
- D1CDFFEB1696E24C00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
- D1E2719E16B46F640046C00C /* yuv420_grey_c.c in Sources */,
- D1E271AA16B46F640046C00C /* yuv420_yuv_c.c in Sources */,
- D1E271B116B470210046C00C /* yuv420_rgb_c.c in Sources */,
- D13946C617C110670091F4A4 /* yuv_libyuv.c in Sources */,
- D1D465DF16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
- D13946D417C119B40091F4A4 /* yuv_util.c in Sources */,
- D1C3D07A17C157CD00CA0FD2 /* compare_common.cc in Sources */,
- D1C3D08317C157CD00CA0FD2 /* compare_neon.cc in Sources */,
- D1C3D09E17C157CD00CA0FD2 /* compare.cc in Sources */,
- D1C3D0A717C157CD00CA0FD2 /* convert_argb.cc in Sources */,
- D1C3D0CB17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
- D1C3D0D417C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
- D1C3D0DD17C157CD00CA0FD2 /* convert.cc in Sources */,
- D1C3D0E617C157CD00CA0FD2 /* cpu_id.cc in Sources */,
- D1C3D0EF17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
- D1C3D10A17C157CD00CA0FD2 /* planar_functions.cc in Sources */,
- D1C3D12517C157CD00CA0FD2 /* rotate_neon.cc in Sources */,
- D1C3D12E17C157CD00CA0FD2 /* rotate.cc in Sources */,
- D1C3D13717C157CD00CA0FD2 /* row_any.cc in Sources */,
- D1C3D14017C157CD00CA0FD2 /* row_common.cc in Sources */,
- D1C3D15217C157CD00CA0FD2 /* row_neon.cc in Sources */,
- D1C3D17617C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */,
- D1C3D17F17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
- D1C3D19117C157CD00CA0FD2 /* scale_neon.cc in Sources */,
- D1C3D1A317C157CD00CA0FD2 /* video_common.cc in Sources */,
- D159BCB817C227F70030FAB6 /* convert_from.cc in Sources */,
- D1BCE06218F3F7FE00C83470 /* scale_common.cc in Sources */,
- D159BCC117C228350030FAB6 /* rotate_argb.cc in Sources */,
- D159BCCA17C228700030FAB6 /* scale.cc in Sources */,
- D1BCE06B18F3F7FE00C83470 /* scale_posix.cc in Sources */,
- D15D361217C386A700F40439 /* row_posix.cc in Sources */,
- D15D361517C386B300F40439 /* compare_posix.cc in Sources */,
- );
- runOnlyForDeploymentPostprocessing = 0;
- };
-/* End PBXSourcesBuildPhase section */
-
-/* Begin XCBuildConfiguration section */
- D1473F43150CA6CE00B20490 /* Debug */ = {
- isa = XCBuildConfiguration;
- buildSettings = {
- ALWAYS_SEARCH_USER_PATHS = NO;
- CLANG_WARN_BOOL_CONVERSION = YES;
- CLANG_WARN_CONSTANT_CONVERSION = YES;
- CLANG_WARN_EMPTY_BODY = YES;
- CLANG_WARN_ENUM_CONVERSION = YES;
- CLANG_WARN_INT_CONVERSION = YES;
- CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
- COPY_PHASE_STRIP = NO;
- GCC_DYNAMIC_NO_PIC = NO;
- GCC_ENABLE_OBJC_EXCEPTIONS = YES;
- GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
- GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
- GCC_OPTIMIZATION_LEVEL = 0;
- GCC_PREPROCESSOR_DEFINITIONS = (
- "DEBUG=1",
- _DEBUG,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = (
- "$(LIBYUV_PREPROCESSOR_IOS)",
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)";
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = (
- "$(LIBYUV_PREPROCESSOR_MAC)",
- "$(inherited)",
- );
- GCC_SYMBOLS_PRIVATE_EXTERN = YES;
- GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
- GCC_WARN_ABOUT_RETURN_TYPE = YES;
- GCC_WARN_UNDECLARED_SELECTOR = YES;
- GCC_WARN_UNINITIALIZED_AUTOS = YES;
- GCC_WARN_UNUSED_FUNCTION = YES;
- GCC_WARN_UNUSED_VARIABLE = YES;
- HEADER_SEARCH_PATHS = (
- "$(SRCROOT)/../ogg/include",
- "$(SRCROOT)/../vorbis/include",
- "$(SRCROOT)/../xal/lib/ogg/include",
- "$(SRCROOT)/../xal/lib/vorbis/include",
- "$(SRCROOT)/../theora/include",
- "$(SRCROOT)/src/YUV/libyuv/include",
- );
- LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__";
- LIBYUV_PREPROCESSOR_MAC = __SSSE3__;
- ONLY_ACTIVE_ARCH = YES;
- SKIP_INSTALL = YES;
- };
- name = Debug;
- };
- D1473F44150CA6CE00B20490 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = Debug;
- };
- D1473F45150CA6D600B20490 /* App Store */ = {
- isa = XCBuildConfiguration;
- buildSettings = {
- ALWAYS_SEARCH_USER_PATHS = NO;
- CLANG_WARN_BOOL_CONVERSION = YES;
- CLANG_WARN_CONSTANT_CONVERSION = YES;
- CLANG_WARN_EMPTY_BODY = YES;
- CLANG_WARN_ENUM_CONVERSION = YES;
- CLANG_WARN_INT_CONVERSION = YES;
- CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
- COPY_PHASE_STRIP = YES;
- GCC_ENABLE_OBJC_EXCEPTIONS = YES;
- GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
- GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
- GCC_OPTIMIZATION_LEVEL = 3;
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = (
- "$(LIBYUV_PREPROCESSOR_IOS)",
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)";
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = (
- "$(LIBYUV_PREPROCESSOR_MAC)",
- "$(inherited)",
- );
- GCC_SYMBOLS_PRIVATE_EXTERN = YES;
- GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
- GCC_WARN_ABOUT_RETURN_TYPE = YES;
- GCC_WARN_UNDECLARED_SELECTOR = YES;
- GCC_WARN_UNINITIALIZED_AUTOS = YES;
- GCC_WARN_UNUSED_FUNCTION = YES;
- GCC_WARN_UNUSED_VARIABLE = YES;
- HEADER_SEARCH_PATHS = (
- "$(SRCROOT)/../ogg/include",
- "$(SRCROOT)/../vorbis/include",
- "$(SRCROOT)/../xal/lib/ogg/include",
- "$(SRCROOT)/../xal/lib/vorbis/include",
- "$(SRCROOT)/../theora/include",
- "$(SRCROOT)/src/YUV/libyuv/include",
- );
- LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__";
- LIBYUV_PREPROCESSOR_MAC = __SSSE3__;
- SKIP_INSTALL = YES;
- };
- name = "App Store";
- };
- D1473F46150CA6D600B20490 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = "App Store";
- };
- D1473F47150CA6E200B20490 /* Release */ = {
- isa = XCBuildConfiguration;
- buildSettings = {
- ALWAYS_SEARCH_USER_PATHS = NO;
- CLANG_WARN_BOOL_CONVERSION = YES;
- CLANG_WARN_CONSTANT_CONVERSION = YES;
- CLANG_WARN_EMPTY_BODY = YES;
- CLANG_WARN_ENUM_CONVERSION = YES;
- CLANG_WARN_INT_CONVERSION = YES;
- CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
- COPY_PHASE_STRIP = YES;
- GCC_ENABLE_OBJC_EXCEPTIONS = YES;
- GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
- GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
- GCC_OPTIMIZATION_LEVEL = 3;
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = (
- "$(LIBYUV_PREPROCESSOR_IOS)",
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)";
- "GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = (
- "$(LIBYUV_PREPROCESSOR_MAC)",
- "$(inherited)",
- );
- GCC_SYMBOLS_PRIVATE_EXTERN = YES;
- GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
- GCC_WARN_ABOUT_RETURN_TYPE = YES;
- GCC_WARN_UNDECLARED_SELECTOR = YES;
- GCC_WARN_UNINITIALIZED_AUTOS = YES;
- GCC_WARN_UNUSED_FUNCTION = YES;
- GCC_WARN_UNUSED_VARIABLE = YES;
- HEADER_SEARCH_PATHS = (
- "$(SRCROOT)/../ogg/include",
- "$(SRCROOT)/../vorbis/include",
- "$(SRCROOT)/../xal/lib/ogg/include",
- "$(SRCROOT)/../xal/lib/vorbis/include",
- "$(SRCROOT)/../theora/include",
- "$(SRCROOT)/src/YUV/libyuv/include",
- );
- LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__";
- LIBYUV_PREPROCESSOR_MAC = __SSSE3__;
- SKIP_INSTALL = YES;
- };
- name = Release;
- };
- D1473F48150CA6E200B20490 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = Release;
- };
- D198F977177A31FC002942E3 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Debug;
- };
- D198F979177A31FC002942E3 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Release;
- };
- D198F97A177A31FC002942E3 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = "App Store";
- };
- D198F9A3177A31FE002942E3 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer_avfoundation;
- SKIP_INSTALL = YES;
- };
- name = Debug;
- };
- D198F9A5177A31FE002942E3 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer_avfoundation;
- SKIP_INSTALL = YES;
- };
- name = Release;
- };
- D198F9A6177A31FE002942E3 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer_avfoundation;
- SKIP_INSTALL = YES;
- };
- name = "App Store";
- };
- D198F9D0177A3200002942E3 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __THEORA,
- __AVFOUNDATION,
- YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer_theora_avfoundation;
- SKIP_INSTALL = YES;
- };
- name = Debug;
- };
- D198F9D2177A3200002942E3 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __THEORA,
- __AVFOUNDATION,
- YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer_theora_avfoundation;
- SKIP_INSTALL = YES;
- };
- name = Release;
- };
- D198F9D3177A3200002942E3 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _MAC,
- __THEORA,
- __AVFOUNDATION,
- YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer_theora_avfoundation;
- SKIP_INSTALL = YES;
- };
- name = "App Store";
- };
- D1BB6FB8150E9E7100EF9400 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Debug;
- };
- D1BB6FBA150E9E7100EF9400 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Release;
- };
- D1BB6FBB150E9E7100EF9400 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __THEORA,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = "App Store";
- };
- D1CDFF441696C77A00609AB0 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = Debug;
- };
- D1CDFF461696C77A00609AB0 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = Release;
- };
- D1CDFF471696C77A00609AB0 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = "App Store";
- };
- D1CDFF6C1696C79700609AB0 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __THEORA,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = Debug;
- };
- D1CDFF6E1696C79700609AB0 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __THEORA,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = Release;
- };
- D1CDFF6F1696C79700609AB0 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- __THEORA,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- INFOPLIST_FILE = Info.plist;
- LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
- PRODUCT_NAME = theoraplayer;
- WRAPPER_EXTENSION = framework;
- };
- name = "App Store";
- };
- D1CDFFC01696E1CA00609AB0 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Debug;
- };
- D1CDFFC21696E1CA00609AB0 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Release;
- };
- D1CDFFC31696E1CA00609AB0 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = "App Store";
- };
- D1CDFFE51696E1D700609AB0 /* Debug */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __THEORA,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Debug;
- };
- D1CDFFE71696E1D700609AB0 /* Release */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __THEORA,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = Release;
- };
- D1CDFFE81696E1D700609AB0 /* App Store */ = {
- isa = XCBuildConfiguration;
- baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
- buildSettings = {
- GCC_PREPROCESSOR_DEFINITIONS = (
- _IOS,
- __THEORA,
- __AVFOUNDATION,
- _YUV_LIBYUV,
- "$(inherited)",
- );
- "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
- GCC_WARN_UNINITIALIZED_AUTOS = NO;
- GCC_WARN_UNUSED_VALUE = NO;
- GCC_WARN_UNUSED_VARIABLE = NO;
- PRODUCT_NAME = theoraplayer;
- SKIP_INSTALL = YES;
- };
- name = "App Store";
- };
-/* End XCBuildConfiguration section */
-
-/* Begin XCConfigurationList section */
- D1473F23150CA69B00B20490 /* Build configuration list for PBXProject "theoraplayer" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D1473F43150CA6CE00B20490 /* Debug */,
- D1473F47150CA6E200B20490 /* Release */,
- D1473F45150CA6D600B20490 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D1473F3F150CA69B00B20490 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D1473F44150CA6CE00B20490 /* Debug */,
- D1473F48150CA6E200B20490 /* Release */,
- D1473F46150CA6D600B20490 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D198F975177A31FC002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D198F977177A31FC002942E3 /* Debug */,
- D198F979177A31FC002942E3 /* Release */,
- D198F97A177A31FC002942E3 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D198F9A1177A31FE002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac AVFoundation)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D198F9A3177A31FE002942E3 /* Debug */,
- D198F9A5177A31FE002942E3 /* Release */,
- D198F9A6177A31FE002942E3 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D198F9CE177A3200002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora AVFoundation)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D198F9D0177A3200002942E3 /* Debug */,
- D198F9D2177A3200002942E3 /* Release */,
- D198F9D3177A3200002942E3 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D1BB6FBC150E9E7100EF9400 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D1BB6FB8150E9E7100EF9400 /* Debug */,
- D1BB6FBA150E9E7100EF9400 /* Release */,
- D1BB6FBB150E9E7100EF9400 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D1CDFF421696C77A00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (AVFoundation)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D1CDFF441696C77A00609AB0 /* Debug */,
- D1CDFF461696C77A00609AB0 /* Release */,
- D1CDFF471696C77A00609AB0 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D1CDFF6A1696C79700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora AVFoundation)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D1CDFF6C1696C79700609AB0 /* Debug */,
- D1CDFF6E1696C79700609AB0 /* Release */,
- D1CDFF6F1696C79700609AB0 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D1CDFFBE1696E1CA00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS AVFoundation)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D1CDFFC01696E1CA00609AB0 /* Debug */,
- D1CDFFC21696E1CA00609AB0 /* Release */,
- D1CDFFC31696E1CA00609AB0 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
- D1CDFFE31696E1D700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora AVFoundation)" */ = {
- isa = XCConfigurationList;
- buildConfigurations = (
- D1CDFFE51696E1D700609AB0 /* Debug */,
- D1CDFFE71696E1D700609AB0 /* Release */,
- D1CDFFE81696E1D700609AB0 /* App Store */,
- );
- defaultConfigurationIsVisible = 0;
- defaultConfigurationName = Debug;
- };
-/* End XCConfigurationList section */
- };
- rootObject = D1473F20150CA69B00B20490 /* Project object */;
-}
diff --git a/drivers/theoraplayer/video_stream_theoraplayer.cpp b/drivers/theoraplayer/video_stream_theoraplayer.cpp
deleted file mode 100644
index 876cac3425..0000000000
--- a/drivers/theoraplayer/video_stream_theoraplayer.cpp
+++ /dev/null
@@ -1,556 +0,0 @@
-/*************************************************************************/
-/* video_stream.cpp */
-/*************************************************************************/
-/* This file is part of: */
-/* GODOT ENGINE */
-/* http://www.godotengine.org */
-/*************************************************************************/
-/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur. */
-/* */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the */
-/* "Software"), to deal in the Software without restriction, including */
-/* without limitation the rights to use, copy, modify, merge, publish, */
-/* distribute, sublicense, and/or sell copies of the Software, and to */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions: */
-/* */
-/* The above copyright notice and this permission notice shall be */
-/* included in all copies or substantial portions of the Software. */
-/* */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/*************************************************************************/
-#include "video_stream_theoraplayer.h"
-
-#include "core/os/file_access.h"
-
-#include "include/theoraplayer/TheoraPlayer.h"
-#include "include/theoraplayer/TheoraTimer.h"
-#include "include/theoraplayer/TheoraAudioInterface.h"
-#include "include/theoraplayer/TheoraDataSource.h"
-#include "include/theoraplayer/TheoraException.h"
-
-#include "core/ring_buffer.h"
-#include "core/os/thread_safe.h"
-
-#include "core/globals.h"
-
-static TheoraVideoManager* mgr = NULL;
-
-class TPDataFA : public TheoraDataSource {
-
- FileAccess* fa;
- String data_name;
-
-public:
-
- int read(void* output,int nBytes) {
-
- if (!fa)
- return -1;
-
- return fa->get_buffer((uint8_t*)output, nBytes);
- };
-
- //! returns a string representation of the DataSource, eg 'File: source.ogg'
- virtual std::string repr() {
- return data_name.utf8().get_data();
- };
-
- //! position the source pointer to byte_index from the start of the source
- virtual void seek(unsigned long byte_index) {
-
- if (!fa)
- return;
-
- fa->seek(byte_index);
- };
-
-
- //! return the size of the stream in bytes
- virtual unsigned long size() {
-
- if (!fa)
- return 0;
-
- return fa->get_len();
- };
-
- //! return the current position of the source pointer
- virtual unsigned long tell() {
-
- if (!fa)
- return 0;
-
- return fa->get_pos();
- };
-
- TPDataFA(const String& p_path) {
-
- fa = FileAccess::open(p_path, FileAccess::READ);
- data_name = "File: " + p_path;
- };
-
- TPDataFA(FileAccess* p_fa, const String& p_path) {
-
- fa = p_fa;
- data_name = "File: " + p_path;
- };
-
- ~TPDataFA() {
-
- if (fa)
- memdelete(fa);
- };
-};
-
-class AudioStreamInput : public AudioStreamResampled {
-
- _THREAD_SAFE_CLASS_;
-
- int channels;
- int freq;
-
- RID stream_rid;
- mutable RingBuffer<float> rb;
- int rb_power;
- int total_wrote;
- bool playing;
- bool paused;
-
-public:
-
- virtual void play() {
-
- _THREAD_SAFE_METHOD_
- _setup(channels, freq, 256);
- stream_rid=AudioServer::get_singleton()->audio_stream_create(get_audio_stream());
- AudioServer::get_singleton()->stream_set_active(stream_rid,true);
- AudioServer::get_singleton()->stream_set_volume_scale(stream_rid,1);
- playing = true;
- paused = false;
- };
- virtual void stop() {
-
- _THREAD_SAFE_METHOD_
-
- AudioServer::get_singleton()->stream_set_active(stream_rid,false);
- //_clear_stream();
- playing=false;
- _clear();
- };
-
- virtual bool is_playing() const { return true; };
-
- virtual void set_paused(bool p_paused) { paused = p_paused; };
- virtual bool is_paused(bool p_paused) const { return paused; };
-
- virtual void set_loop(bool p_enable) {};
- virtual bool has_loop() const { return false; };
-
- virtual float get_length() const { return 0; };
-
- virtual String get_stream_name() const { return "Theora Audio Stream"; };
-
- virtual int get_loop_count() const { return 1; };
-
- virtual float get_pos() const { return 0; };
- virtual void seek_pos(float p_time) {};
-
- virtual UpdateMode get_update_mode() const { return UPDATE_THREAD; };
-
- virtual bool _can_mix() const { return true; };
-
- void input(float* p_data, int p_samples) {
-
-
- _THREAD_SAFE_METHOD_;
- //printf("input %i samples from %p\n", p_samples, p_data);
- if (rb.space_left() < p_samples) {
- rb_power += 1;
- rb.resize(rb_power);
- }
- rb.write(p_data, p_samples);
-
- update(); //update too here for less latency
- };
-
- void update() {
-
- _THREAD_SAFE_METHOD_;
- int todo = get_todo();
- int16_t* buffer = get_write_buffer();
- int frames = rb.data_left()/channels;
- const int to_write = MIN(todo, frames);
-
- for (int i=0; i<to_write*channels; i++) {
-
- int v = rb.read() * 32767;
- int16_t sample = CLAMP(v,-32768,32767);
- buffer[i] = sample;
- };
- write(to_write);
- total_wrote += to_write;
- };
-
- int get_pending() const {
- return rb.data_left();
- };
-
- int get_total_wrote() {
-
- return total_wrote - (get_total() - get_todo());
- };
-
- AudioStreamInput(int p_channels, int p_freq) {
-
- playing = false;
- paused = true;
- channels = p_channels;
- freq = p_freq;
- total_wrote = 0;
- rb_power = 22;
- rb.resize(rb_power);
- };
-
- ~AudioStreamInput() {
-
- stop();
- };
-};
-
-class TPAudioGodot : public TheoraAudioInterface, TheoraTimer {
-
- Ref<AudioStreamInput> stream;
- int sample_count;
- int channels;
- int freq;
-
-public:
-
- void insertData(float* data, int nSamples) {
-
- stream->input(data, nSamples);
- };
-
- TPAudioGodot(TheoraVideoClip* owner, int nChannels, int p_freq)
- : TheoraAudioInterface(owner, nChannels, p_freq), TheoraTimer() {
-
- printf("***************** audio interface constructor freq %i\n", p_freq);
- channels = nChannels;
- freq = p_freq;
- stream = Ref<AudioStreamInput>(memnew(AudioStreamInput(nChannels, p_freq)));
- stream->play();
- sample_count = 0;
- owner->setTimer(this);
- };
-
- void stop() {
-
- stream->stop();
- };
-
- void update(float time_increase)
- {
- float prev_time = mTime;
- //mTime = (float)(stream->get_total_wrote()) / freq;
- //mTime = MAX(0,mTime-AudioServer::get_singleton()->get_output_delay());
- //mTime = (float)sample_count / channels / freq;
- mTime += time_increase;
- if (mTime - prev_time > .02) printf("time increase %f secs\n", mTime - prev_time);
- //float duration=mClip->getDuration();
- //if (mTime > duration) mTime=duration;
- //printf("time at timer is %f, %f, samples %i\n", mTime, time_increase, sample_count);
- }
-};
-
-class TPAudioGodotFactory : public TheoraAudioInterfaceFactory {
-
-public:
- TheoraAudioInterface* createInstance(TheoraVideoClip* owner, int nChannels, int freq) {
-
- printf("************** creating audio output\n");
- TheoraAudioInterface* ta = new TPAudioGodot(owner, nChannels, freq);
- return ta;
- };
-};
-
-static TPAudioGodotFactory* audio_factory = NULL;
-
-void VideoStreamTheoraplayer::stop() {
-
- playing = false;
- if (clip) {
- clip->stop();
- clip->seek(0);
- };
- started = true;
-};
-
-void VideoStreamTheoraplayer::play() {
- if (clip)
- playing = true;
-};
-
-bool VideoStreamTheoraplayer::is_playing() const {
-
- return playing;
-};
-
-void VideoStreamTheoraplayer::set_paused(bool p_paused) {
-
- paused = p_paused;
- if (paused) {
- clip->pause();
- } else {
- if (clip && playing && !started)
- clip->play();
- }
-};
-
-bool VideoStreamTheoraplayer::is_paused(bool p_paused) const {
-
- return !playing;
-};
-
-void VideoStreamTheoraplayer::set_loop(bool p_enable) {
-
- loop = p_enable;
-};
-
-bool VideoStreamTheoraplayer::has_loop() const {
-
- return loop;
-};
-
-float VideoStreamTheoraplayer::get_length() const {
-
- if (!clip)
- return 0;
-
- return clip->getDuration();
-};
-
-
-float VideoStreamTheoraplayer::get_pos() const {
-
- if (!clip)
- return 0;
-
- return clip->getTimer()->getTime();
-};
-
-void VideoStreamTheoraplayer::seek_pos(float p_time) {
-
- if (!clip)
- return;
-
- clip->seek(p_time);
-};
-
-int VideoStreamTheoraplayer::get_pending_frame_count() const {
-
- if (!clip)
- return 0;
-
- TheoraVideoFrame* f = clip->getNextFrame();
- return f ? 1 : 0;
-};
-
-
-void VideoStreamTheoraplayer::pop_frame(Ref<ImageTexture> p_tex) {
-
- if (!clip)
- return;
-
- TheoraVideoFrame* f = clip->getNextFrame();
- if (!f) {
- return;
- };
-
-#ifdef GLES2_ENABLED
-// RasterizerGLES2* r = RasterizerGLES2::get_singleton();
-// r->_texture_set_data(p_tex, f->mBpp == 3 ? Image::Format_RGB : Image::Format_RGBA, f->mBpp, w, h, f->getBuffer());
-
-#endif
-
- float w=clip->getWidth(),h=clip->getHeight();
- int imgsize = w * h * f->mBpp;
-
- int size = f->getStride() * f->getHeight() * f->mBpp;
- data.resize(imgsize);
- {
- DVector<uint8_t>::Write wr = data.write();
- uint8_t* ptr = wr.ptr();
- copymem(ptr, f->getBuffer(), imgsize);
- }
- /*
- for (int i=0; i<h; i++) {
- int dstofs = i * w * f->mBpp;
- int srcofs = i * f->getStride() * f->mBpp;
- copymem(ptr + dstofs, f->getBuffer() + dstofs, w * f->mBpp);
- };
- */
- Image frame = Image();
- frame.create(w, h, 0, f->mBpp == 3 ? Image::FORMAT_RGB : Image::FORMAT_RGBA, data);
-
- clip->popFrame();
-
- if (p_tex->get_width() == 0) {
- p_tex->create(frame.get_width(),frame.get_height(),frame.get_format(),Texture::FLAG_VIDEO_SURFACE|Texture::FLAG_FILTER);
- p_tex->set_data(frame);
- } else {
-
- p_tex->set_data(frame);
- };
-};
-
-/*
-Image VideoStreamTheoraplayer::pop_frame() {
-
- Image ret = frame;
- frame = Image();
- return ret;
-};
-*/
-
-Image VideoStreamTheoraplayer::peek_frame() const {
-
- return Image();
-};
-
-void VideoStreamTheoraplayer::update(float p_time) {
-
- if (!mgr)
- return;
-
- if (!clip)
- return;
-
- if (!playing || paused)
- return;
-
- //printf("video update!\n");
- if (started) {
- if (clip->getNumReadyFrames() < 2) {
- printf("frames not ready, returning!\n");
- return;
- };
- started = false;
- //printf("playing clip!\n");
- clip->play();
- } else if (clip->isDone()) {
- playing = false;
- };
-
- mgr->update(p_time);
-};
-
-
-void VideoStreamTheoraplayer::set_audio_track(int p_idx) {
- audio_track=p_idx;
- if (clip)
- clip->set_audio_track(audio_track);
-}
-
-void VideoStreamTheoraplayer::set_file(const String& p_file) {
-
- FileAccess* f = FileAccess::open(p_file, FileAccess::READ);
- if (!f || !f->is_open())
- return;
-
- if (!audio_factory) {
- audio_factory = memnew(TPAudioGodotFactory);
- };
-
- if (mgr == NULL) {
- mgr = memnew(TheoraVideoManager);
- mgr->setAudioInterfaceFactory(audio_factory);
- };
-
- int track = GLOBAL_DEF("theora/audio_track", 0); // hack
-
- if (p_file.find(".mp4") != -1) {
-
- std::string file = p_file.replace("res://", "").utf8().get_data();
- clip = mgr->createVideoClip(file, TH_RGBX, 2, false, track);
- //clip->set_audio_track(audio_track);
- memdelete(f);
-
- } else {
-
- TheoraDataSource* ds = memnew(TPDataFA(f, p_file));
-
- try {
- clip = mgr->createVideoClip(ds);
- clip->set_audio_track(audio_track);
- } catch (_TheoraGenericException e) {
- printf("exception ocurred! %s\n", e.repr().c_str());
- clip = NULL;
- };
- };
-
- clip->pause();
- started = true;
-};
-
-VideoStreamTheoraplayer::~VideoStreamTheoraplayer() {
-
- stop();
- //if (mgr) { // this should be a singleton or static or something
- // memdelete(mgr);
- //};
- //mgr = NULL;
- if (clip) {
- mgr->destroyVideoClip(clip);
- clip = NULL;
- };
-};
-
-VideoStreamTheoraplayer::VideoStreamTheoraplayer() {
-
- //mgr = NULL;
- clip = NULL;
- started = false;
- playing = false;
- paused = false;
- loop = false;
- audio_track=0;
-};
-
-
-RES ResourceFormatLoaderVideoStreamTheoraplayer::load(const String &p_path, const String& p_original_path, Error *r_error) {
- if (r_error)
- *r_error=OK;
-
- VideoStreamTheoraplayer *stream = memnew(VideoStreamTheoraplayer);
- stream->set_file(p_path);
- return Ref<VideoStreamTheoraplayer>(stream);
-}
-
-void ResourceFormatLoaderVideoStreamTheoraplayer::get_recognized_extensions(List<String> *p_extensions) const {
-
- p_extensions->push_back("ogm");
- p_extensions->push_back("ogv");
- p_extensions->push_back("mp4");
-}
-bool ResourceFormatLoaderVideoStreamTheoraplayer::handles_type(const String& p_type) const {
- return p_type=="VideoStream" || p_type == "VideoStreamTheoraplayer";
-}
-
-String ResourceFormatLoaderVideoStreamTheoraplayer::get_resource_type(const String &p_path) const {
-
- String exl=p_path.extension().to_lower();
- if (exl=="ogm" || exl=="ogv" || exl=="mp4")
- return "VideoStream";
- return "";
-}
-
-
-
diff --git a/drivers/theoraplayer/video_stream_theoraplayer.h b/drivers/theoraplayer/video_stream_theoraplayer.h
deleted file mode 100644
index 69cae7c4a2..0000000000
--- a/drivers/theoraplayer/video_stream_theoraplayer.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef VIDEO_STREAM_THEORAPLAYER_H
-#define VIDEO_STREAM_THEORAPLAYER_H
-
-#include "scene/resources/video_stream.h"
-#include "io/resource_loader.h"
-#include "scene/resources/texture.h"
-
-class TheoraVideoManager;
-class TheoraVideoClip;
-
-class VideoStreamTheoraplayer : public VideoStream {
-
- OBJ_TYPE(VideoStreamTheoraplayer,VideoStream);
-
- mutable DVector<uint8_t> data;
- TheoraVideoClip* clip;
- bool started;
- bool playing;
- bool loop;
- bool paused;
-
- int audio_track;
-
-public:
-
- virtual void stop();
- virtual void play();
-
- virtual bool is_playing() const;
-
- virtual void set_paused(bool p_paused);
- virtual bool is_paused(bool p_paused) const;
-
- virtual void set_loop(bool p_enable);
- virtual bool has_loop() const;
-
- virtual float get_pos() const;
- virtual void seek_pos(float p_time);
-
- virtual float get_length() const;
-
- virtual int get_pending_frame_count() const;
- virtual void pop_frame(Ref<ImageTexture> p_tex);
- virtual Image peek_frame() const;
-
- void update(float p_time);
-
- void set_file(const String& p_file);
- void set_audio_track(int p_idx);
-
- ~VideoStreamTheoraplayer();
- VideoStreamTheoraplayer();
-};
-
-class ResourceFormatLoaderVideoStreamTheoraplayer : public ResourceFormatLoader {
-public:
- virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
- virtual void get_recognized_extensions(List<String> *p_extensions) const;
- virtual bool handles_type(const String& p_type) const;
- virtual String get_resource_type(const String &p_path) const;
-
-};
-
-
-#endif
-
diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp
index 76042089ff..8e70ecc932 100644
--- a/drivers/unix/file_access_unix.cpp
+++ b/drivers/unix/file_access_unix.cpp
@@ -63,7 +63,7 @@ Error FileAccessUnix::_open(const String& p_path, int p_mode_flags) {
fclose(f);
f=NULL;
- String path=fix_path(p_path);
+ path=fix_path(p_path);
//printf("opening %ls, %i\n", path.c_str(), Memory::get_static_mem_usage());
ERR_FAIL_COND_V(f,ERR_ALREADY_IN_USE);
@@ -114,6 +114,9 @@ void FileAccessUnix::close() {
return;
fclose(f);
f = NULL;
+ if (close_notification_func) {
+ close_notification_func(path,flags);
+ }
if (save_path!="") {
//unlink(save_path.utf8().get_data());
@@ -240,6 +243,7 @@ FileAccess * FileAccessUnix::create_libc() {
return memnew( FileAccessUnix );
}
+CloseNotificationFunc FileAccessUnix::close_notification_func=NULL;
FileAccessUnix::FileAccessUnix() {
diff --git a/drivers/unix/file_access_unix.h b/drivers/unix/file_access_unix.h
index 5b0f0e7cb7..6c41a51ec5 100644
--- a/drivers/unix/file_access_unix.h
+++ b/drivers/unix/file_access_unix.h
@@ -38,6 +38,10 @@
/**
@author Juan Linietsky <reduzio@gmail.com>
*/
+
+
+typedef void (*CloseNotificationFunc)(const String& p_file,int p_flags);
+
class FileAccessUnix : public FileAccess {
FILE *f;
@@ -45,10 +49,13 @@ class FileAccessUnix : public FileAccess {
void check_errors() const;
mutable Error last_error;
String save_path;
+ String path;
- static FileAccess* create_libc();
+ static FileAccess* create_libc();
public:
+ static CloseNotificationFunc close_notification_func;
+
virtual Error _open(const String& p_path, int p_mode_flags); ///< open a file
virtual void close(); ///< close a file
virtual bool is_open() const; ///< true when file is open
diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp
index 314e13cee4..96f90e6be1 100644
--- a/drivers/unix/os_unix.cpp
+++ b/drivers/unix/os_unix.cpp
@@ -226,8 +226,9 @@ uint64_t OS_Unix::get_unix_time() const {
uint64_t OS_Unix::get_system_time_msec() const {
struct timeval tv_now;
gettimeofday(&tv_now, NULL);
- localtime(&tv_now.tv_usec);
- uint64_t msec = tv_now.tv_usec/1000;
+ //localtime(&tv_now.tv_usec);
+ //localtime((const long *)&tv_now.tv_usec);
+ uint64_t msec = uint64_t(tv_now.tv_sec)*1000+tv_now.tv_usec/1000;
return msec;
}
diff --git a/drivers/vorbis/audio_stream_ogg_vorbis.cpp b/drivers/vorbis/audio_stream_ogg_vorbis.cpp
index ee9ba8da4d..ca055c8b62 100644
--- a/drivers/vorbis/audio_stream_ogg_vorbis.cpp
+++ b/drivers/vorbis/audio_stream_ogg_vorbis.cpp
@@ -30,7 +30,7 @@
-size_t AudioStreamOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_count, void *_f) {
+size_t AudioStreamPlaybackOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_count, void *_f) {
//printf("read to %p, %i bytes, %i nmemb, %p\n",p_dst,p_data,p_count,_f);
FileAccess *fa=(FileAccess*)_f;
@@ -46,7 +46,7 @@ size_t AudioStreamOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_c
return read;
}
-int AudioStreamOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) {
+int AudioStreamPlaybackOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) {
//printf("seek to %p, offs %i, whence %i\n",_f,(int)offs,whence);
@@ -76,7 +76,7 @@ int AudioStreamOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) {
#endif
}
-int AudioStreamOGGVorbis::_ov_close_func(void *_f) {
+int AudioStreamPlaybackOGGVorbis::_ov_close_func(void *_f) {
// printf("close %p\n",_f);
if (!_f)
@@ -86,7 +86,7 @@ int AudioStreamOGGVorbis::_ov_close_func(void *_f) {
fa->close();
return 0;
}
-long AudioStreamOGGVorbis::_ov_tell_func(void *_f) {
+long AudioStreamPlaybackOGGVorbis::_ov_tell_func(void *_f) {
//printf("close %p\n",_f);
@@ -95,38 +95,32 @@ long AudioStreamOGGVorbis::_ov_tell_func(void *_f) {
}
-bool AudioStreamOGGVorbis::_can_mix() const {
- return /*playing &&*/ !paused;
-}
-
-
-void AudioStreamOGGVorbis::update() {
+int AudioStreamPlaybackOGGVorbis::mix(int16_t* p_bufer,int p_frames) {
- _THREAD_SAFE_METHOD_
-
- if (!playing && !setting_up)
- return;
+ if (!playing)
+ return 0;
+ int total=p_frames;
while (true) {
- int todo = get_todo();
+ int todo = p_frames;
- if (todo==0 || todo<MIN_MIX)
+ if (todo==0 || todo<MIN_MIX) {
break;
+ }
//printf("to mix %i - mix me %i bytes\n",to_mix,to_mix*stream_channels*sizeof(int16_t));
#ifdef BIG_ENDIAN_ENABLED
- long ret=ov_read(&vf,(char*)get_write_buffer(),todo*stream_channels*sizeof(int16_t), 1, 2, 1, &current_section);
+ long ret=ov_read(&vf,(char*)p_bufer,todo*stream_channels*sizeof(int16_t), 1, 2, 1, &current_section);
#else
- long ret=ov_read(&vf,(char*)get_write_buffer(),todo*stream_channels*sizeof(int16_t), 0, 2, 1, &current_section);
+ long ret=ov_read(&vf,(char*)p_bufer,todo*stream_channels*sizeof(int16_t), 0, 2, 1, &current_section);
#endif
+
if (ret<0) {
playing = false;
- setting_up=false;
-
ERR_EXPLAIN("Error reading OGG Vorbis File: "+file);
ERR_BREAK(ret<0);
} else if (ret==0) { // end of song, reload?
@@ -138,9 +132,8 @@ void AudioStreamOGGVorbis::update() {
if (!has_loop()) {
playing=false;
- setting_up=false;
repeats=1;
- return;
+ break;
}
f=FileAccess::open(file,FileAccess::READ);
@@ -148,11 +141,22 @@ void AudioStreamOGGVorbis::update() {
int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks);
if (errv!=0) {
playing=false;
- setting_up=false;
- return; // :(
+ break;; // :(
}
- frames_mixed=0;
+ if (loop_restart_time) {
+ bool ok = ov_time_seek(&vf,loop_restart_time)==0;
+ if (!ok) {
+ playing=false;
+ //ERR_EXPLAIN("loop restart time rejected");
+ ERR_PRINT("loop restart time rejected")
+ }
+
+ frames_mixed=stream_srate*loop_restart_time;
+ } else {
+
+ frames_mixed=0;
+ }
repeats++;
continue;
@@ -162,16 +166,19 @@ void AudioStreamOGGVorbis::update() {
ret/=sizeof(int16_t);
frames_mixed+=ret;
- write(ret);
+
+ p_bufer+=ret*stream_channels;
+ p_frames-=ret;
+
}
-}
+ return total-p_frames;
+}
-void AudioStreamOGGVorbis::play() {
- _THREAD_SAFE_METHOD_
+void AudioStreamPlaybackOGGVorbis::play(float p_from) {
if (playing)
stop();
@@ -179,56 +186,46 @@ void AudioStreamOGGVorbis::play() {
if (_load_stream()!=OK)
return;
+
frames_mixed=0;
- playing=false;
- setting_up=true;
- update();
- if (!setting_up)
- return;
- setting_up=false;
playing=true;
+ if (p_from>0) {
+ seek_pos(p_from);
+ }
}
-void AudioStreamOGGVorbis::_close_file() {
+void AudioStreamPlaybackOGGVorbis::_close_file() {
if (f) {
+
memdelete(f);
f=NULL;
}
}
-void AudioStreamOGGVorbis::stop() {
-
- _THREAD_SAFE_METHOD_
+bool AudioStreamPlaybackOGGVorbis::is_playing() const {
+ return playing;
+}
+void AudioStreamPlaybackOGGVorbis::stop() {
_clear_stream();
playing=false;
- _clear();
-}
-
-AudioStreamOGGVorbis::UpdateMode AudioStreamOGGVorbis::get_update_mode() const {
-
- return UPDATE_THREAD;
+ //_clear();
}
-bool AudioStreamOGGVorbis::is_playing() const {
+float AudioStreamPlaybackOGGVorbis::get_pos() const {
- return playing || (get_total() - get_todo() -1 > 0);
-}
-
-float AudioStreamOGGVorbis::get_pos() const {
-
- int32_t frames = int32_t(frames_mixed) - (int32_t(get_total()) - get_todo());
+ int32_t frames = int32_t(frames_mixed);
if (frames < 0)
frames=0;
return double(frames) / stream_srate;
}
-void AudioStreamOGGVorbis::seek_pos(float p_time) {
+void AudioStreamPlaybackOGGVorbis::seek_pos(float p_time) {
+
- _THREAD_SAFE_METHOD_
if (!playing)
return;
@@ -237,85 +234,107 @@ void AudioStreamOGGVorbis::seek_pos(float p_time) {
frames_mixed=stream_srate*p_time;
}
-String AudioStreamOGGVorbis::get_stream_name() const {
+String AudioStreamPlaybackOGGVorbis::get_stream_name() const {
return "";
}
-void AudioStreamOGGVorbis::set_loop(bool p_enable) {
+void AudioStreamPlaybackOGGVorbis::set_loop(bool p_enable) {
loops=p_enable;
}
-bool AudioStreamOGGVorbis::has_loop() const {
+bool AudioStreamPlaybackOGGVorbis::has_loop() const {
return loops;
}
-int AudioStreamOGGVorbis::get_loop_count() const {
+int AudioStreamPlaybackOGGVorbis::get_loop_count() const {
return repeats;
}
-void AudioStreamOGGVorbis::set_file(const String& p_file) {
+Error AudioStreamPlaybackOGGVorbis::set_file(const String& p_file) {
file=p_file;
-}
-
-Error AudioStreamOGGVorbis::_load_stream() {
-
- _clear_stream();
- if (file=="")
- return ERR_INVALID_DATA;
-
+ stream_valid=false;
Error err;
f=FileAccess::open(file,FileAccess::READ,&err);
-
if (err) {
ERR_FAIL_COND_V( err, err );
}
int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks);
+ switch(errv) {
-
+ case OV_EREAD: { // - A read from media returned an error.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_CANT_READ );
+ } break;
+ case OV_EVERSION: // - Vorbis version mismatch.
+ case OV_ENOTVORBIS: { // - Bitstream is not Vorbis data.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
+ } break;
+ case OV_EBADHEADER: { // - Invalid Vorbis bitstream header.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_FILE_CORRUPT );
+ } break;
+ case OV_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
+ memdelete(f); f=NULL;
+ ERR_FAIL_V( ERR_BUG );
+ } break;
+ }
const vorbis_info *vinfo=ov_info(&vf,-1);
stream_channels=vinfo->channels;
stream_srate=vinfo->rate;
- Error serr = _setup(stream_channels,stream_srate);
+ ogg_int64_t len = ov_time_total(&vf,-1);
+ length=len/1000.0;
+ ov_clear(&vf);
+ memdelete(f);
+ f=NULL;
+ stream_valid=true;
+
+
+ return OK;
+}
+
+Error AudioStreamPlaybackOGGVorbis::_load_stream() {
+
+ ERR_FAIL_COND_V(!stream_valid,ERR_UNCONFIGURED);
- if (serr) {
- _close_file();
- ERR_FAIL_V( ERR_INVALID_DATA );
+ _clear_stream();
+ if (file=="")
+ return ERR_INVALID_DATA;
+
+ Error err;
+ f=FileAccess::open(file,FileAccess::READ,&err);
+ if (err) {
+ ERR_FAIL_COND_V( err, err );
}
+ int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks);
switch(errv) {
case OV_EREAD: { // - A read from media returned an error.
- _close_file();
+ memdelete(f); f=NULL;
ERR_FAIL_V( ERR_FILE_CANT_READ );
} break;
case OV_EVERSION: // - Vorbis version mismatch.
case OV_ENOTVORBIS: { // - Bitstream is not Vorbis data.
- _close_file();
+ memdelete(f); f=NULL;
ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
} break;
case OV_EBADHEADER: { // - Invalid Vorbis bitstream header.
- _close_file();
+ memdelete(f); f=NULL;
ERR_FAIL_V( ERR_FILE_CORRUPT );
} break;
case OV_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
-
- _close_file();
+ memdelete(f); f=NULL;
ERR_FAIL_V( ERR_BUG );
} break;
}
-
-
- ogg_int64_t len = ov_time_total(&vf,-1);
-
- length=len/1000.0;
-
repeats=0;
stream_loaded=true;
@@ -324,16 +343,16 @@ Error AudioStreamOGGVorbis::_load_stream() {
}
-float AudioStreamOGGVorbis::get_length() const {
+float AudioStreamPlaybackOGGVorbis::get_length() const {
if (!stream_loaded) {
- if (const_cast<AudioStreamOGGVorbis*>(this)->_load_stream()!=OK)
+ if (const_cast<AudioStreamPlaybackOGGVorbis*>(this)->_load_stream()!=OK)
return 0;
}
return length;
}
-void AudioStreamOGGVorbis::_clear_stream() {
+void AudioStreamPlaybackOGGVorbis::_clear_stream() {
if (!stream_loaded)
return;
@@ -346,18 +365,18 @@ void AudioStreamOGGVorbis::_clear_stream() {
playing=false;
}
-void AudioStreamOGGVorbis::set_paused(bool p_paused) {
+void AudioStreamPlaybackOGGVorbis::set_paused(bool p_paused) {
paused=p_paused;
}
-bool AudioStreamOGGVorbis::is_paused(bool p_paused) const {
+bool AudioStreamPlaybackOGGVorbis::is_paused(bool p_paused) const {
return paused;
}
-AudioStreamOGGVorbis::AudioStreamOGGVorbis() {
+AudioStreamPlaybackOGGVorbis::AudioStreamPlaybackOGGVorbis() {
loops=false;
playing=false;
@@ -367,17 +386,18 @@ AudioStreamOGGVorbis::AudioStreamOGGVorbis() {
_ov_callbacks.tell_func=_ov_tell_func;
f = NULL;
stream_loaded=false;
- repeats=0;
- setting_up=false;
+ stream_valid=false;
+ repeats=0;
paused=true;
stream_channels=0;
stream_srate=0;
current_section=0;
length=0;
+ loop_restart_time=0;
}
-AudioStreamOGGVorbis::~AudioStreamOGGVorbis() {
+AudioStreamPlaybackOGGVorbis::~AudioStreamPlaybackOGGVorbis() {
_clear_stream();
diff --git a/drivers/vorbis/audio_stream_ogg_vorbis.h b/drivers/vorbis/audio_stream_ogg_vorbis.h
index 5e3649d980..827d8b0be3 100644
--- a/drivers/vorbis/audio_stream_ogg_vorbis.h
+++ b/drivers/vorbis/audio_stream_ogg_vorbis.h
@@ -29,17 +29,16 @@
#ifndef AUDIO_STREAM_OGG_VORBIS_H
#define AUDIO_STREAM_OGG_VORBIS_H
-#include "scene/resources/audio_stream_resampled.h"
+#include "scene/resources/audio_stream.h"
#include "vorbis/vorbisfile.h"
#include "os/file_access.h"
#include "io/resource_loader.h"
#include "os/thread_safe.h"
-class AudioStreamOGGVorbis : public AudioStreamResampled {
- OBJ_TYPE(AudioStreamOGGVorbis,AudioStreamResampled);
- _THREAD_SAFE_CLASS_
+class AudioStreamPlaybackOGGVorbis : public AudioStreamPlayback {
+ OBJ_TYPE(AudioStreamPlaybackOGGVorbis,AudioStreamPlayback);
enum {
MIN_MIX=1024
@@ -54,9 +53,6 @@ class AudioStreamOGGVorbis : public AudioStreamResampled {
static int _ov_close_func(void *_f);
static long _ov_tell_func(void *_f);
-
- virtual bool _can_mix() const;
-
String file;
int64_t frames_mixed;
@@ -67,7 +63,7 @@ class AudioStreamOGGVorbis : public AudioStreamResampled {
int stream_srate;
int current_section;
- volatile bool setting_up;
+
bool paused;
bool loops;
int repeats;
@@ -76,17 +72,21 @@ class AudioStreamOGGVorbis : public AudioStreamResampled {
void _clear_stream();
void _close_file();
+ bool stream_valid;
+ float loop_restart_time;
-public:
+public:
- void set_file(const String& p_file);
+ Error set_file(const String& p_file);
- virtual void play();
+ virtual void play(float p_from=0);
virtual void stop();
virtual bool is_playing() const;
+ virtual void set_loop_restart_time(float p_time) { loop_restart_time=0; }
+
virtual void set_paused(bool p_paused);
virtual bool is_paused(bool p_paused) const;
@@ -102,11 +102,32 @@ public:
virtual float get_pos() const;
virtual void seek_pos(float p_time);
- virtual UpdateMode get_update_mode() const;
- virtual void update();
+ virtual int get_channels() const { return stream_channels; }
+ virtual int get_mix_rate() const { return stream_srate; }
+
+ virtual int get_minimum_buffer_size() const { return 0; }
+ virtual int mix(int16_t* p_bufer,int p_frames);
+
+ AudioStreamPlaybackOGGVorbis();
+ ~AudioStreamPlaybackOGGVorbis();
+};
+
+
+class AudioStreamOGGVorbis : public AudioStream {
+
+ OBJ_TYPE(AudioStreamOGGVorbis,AudioStream);
+
+ String file;
+public:
+
+ Ref<AudioStreamPlayback> instance_playback() {
+ Ref<AudioStreamPlaybackOGGVorbis> pb = memnew( AudioStreamPlaybackOGGVorbis );
+ pb->set_file(file);
+ return pb;
+ }
+
+ void set_file(const String& p_file) { file=p_file; }
- AudioStreamOGGVorbis();
- ~AudioStreamOGGVorbis();
};
class ResourceFormatLoaderAudioStreamOGGVorbis : public ResourceFormatLoader {
diff --git a/drivers/webp/dsp/dsp.h b/drivers/webp/dsp/dsp.h
index afe30413c6..fd686a8532 100644
--- a/drivers/webp/dsp/dsp.h
+++ b/drivers/webp/dsp/dsp.h
@@ -29,7 +29,7 @@ extern "C" {
#define WEBP_USE_SSE2
#endif
-#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__)
#define WEBP_ANDROID_NEON // Android targets that might support NEON
#endif